diff --git a/lapack-netlib/SRC/zgeqrt2.c b/lapack-netlib/SRC/zgeqrt2.c new file mode 100644 index 000000000..095693e8d --- /dev/null +++ b/lapack-netlib/SRC/zgeqrt2.c @@ -0,0 +1,661 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY re +presentation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGEQRT2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGEQRT2( M, N, A, LDA, T, LDT, INFO ) */ + +/* INTEGER INFO, LDA, LDT, M, N */ +/* COMPLEX*16 A( LDA, * ), T( LDT, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGEQRT2 computes a QR factorization of a complex M-by-N matrix A, */ +/* > using the compact WY representation of Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the complex M-by-N matrix A. On exit, the elements on and */ +/* > above the diagonal contain the N-by-N upper triangular matrix R; the */ +/* > elements below the diagonal are the columns of V. See below for */ +/* > further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is COMPLEX*16 array, dimension (LDT,N) */ +/* > The N-by-N upper triangular factor of the block reflector. */ +/* > The elements on and above the diagonal contain the block */ +/* > reflector T; the elements below the diagonal are not used. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th column */ +/* > below the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 ) */ +/* > ( v1 1 ) */ +/* > ( v1 v2 1 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. The */ +/* > block reflector H is then given by */ +/* > */ +/* > H = I - V * T * V**H */ +/* > */ +/* > where V**H is the conjugate transpose of V. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgeqrt2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *t, integer *ldt, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3; + doublecomplex z__1, z__2; + + /* Local variables */ + integer i__, k; + doublecomplex alpha; + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + ztrmv_(char *, char *, char *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *), + xerbla_(char *, integer *, ftnlen), zlarfg_(integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *); + doublecomplex aii; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*ldt < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEQRT2", &i__1, (ftnlen)7); + return 0; + } + + k = f2cmin(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1] + , &c__1, &t[i__ + t_dim1]); + if (i__ < *n) { + +/* Apply H(i) to A(I:M,I+1:N) from the left */ + + i__2 = i__ + i__ * a_dim1; + aii.r = a[i__2].r, aii.i = a[i__2].i; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + zgemv_("C", &i__2, &i__3, &c_b1, &a[i__ + (i__ + 1) * a_dim1], + lda, &a[i__ + i__ * a_dim1], &c__1, &c_b2, &t[*n * t_dim1 + + 1], &c__1); + +/* A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H */ + + d_cnjg(&z__2, &t[i__ + t_dim1]); + z__1.r = -z__2.r, z__1.i = -z__2.i; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + zgerc_(&i__2, &i__3, &alpha, &a[i__ + i__ * a_dim1], &c__1, &t[*n + * t_dim1 + 1], &c__1, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + a[i__2].r = aii.r, a[i__2].i = aii.i; + } + } + + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii.r = a[i__2].r, aii.i = a[i__2].i; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* T(1:I-1,I) := alpha * A(I:M,1:I-1)**H * A(I:M,I) */ + + i__2 = i__ + t_dim1; + z__1.r = -t[i__2].r, z__1.i = -t[i__2].i; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + zgemv_("C", &i__2, &i__3, &alpha, &a[i__ + a_dim1], lda, &a[i__ + i__ + * a_dim1], &c__1, &c_b2, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ + i__ * a_dim1; + a[i__2].r = aii.r, a[i__2].i = aii.i; + +/* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) */ + + i__2 = i__ - 1; + ztrmv_("U", "N", "N", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], + &c__1); + +/* T(I,I) = tau(I) */ + + i__2 = i__ + i__ * t_dim1; + i__3 = i__ + t_dim1; + t[i__2].r = t[i__3].r, t[i__2].i = t[i__3].i; + i__2 = i__ + t_dim1; + t[i__2].r = 0., t[i__2].i = 0.; + } + +/* End of ZGEQRT2 */ + + return 0; +} /* zgeqrt2_ */ + diff --git a/lapack-netlib/SRC/zgeqrt3.c b/lapack-netlib/SRC/zgeqrt3.c new file mode 100644 index 000000000..80009bd2f --- /dev/null +++ b/lapack-netlib/SRC/zgeqrt3.c @@ -0,0 +1,693 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the c +ompact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGEQRT3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGEQRT3( M, N, A, LDA, T, LDT, INFO ) */ + +/* INTEGER INFO, LDA, M, N, LDT */ +/* COMPLEX*16 A( LDA, * ), T( LDT, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGEQRT3 recursively computes a QR factorization of a complex M-by-N */ +/* > matrix A, using the compact WY representation of Q. */ +/* > */ +/* > Based on the algorithm of Elmroth and Gustavson, */ +/* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the complex M-by-N matrix A. On exit, the elements on */ +/* > and above the diagonal contain the N-by-N upper triangular matrix R; */ +/* > the elements below the diagonal are the columns of V. See below for */ +/* > further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is COMPLEX*16 array, dimension (LDT,N) */ +/* > The N-by-N upper triangular factor of the block reflector. */ +/* > The elements on and above the diagonal contain the block */ +/* > reflector T; the elements below the diagonal are not used. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th column */ +/* > below the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 ) */ +/* > ( v1 1 ) */ +/* > ( v1 v2 1 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. The */ +/* > block reflector H is then given by */ +/* > */ +/* > H = I - V * T * V**H */ +/* > */ +/* > where V**H is the conjugate transpose of V. */ +/* > */ +/* > For details of the algorithm, see Elmroth and Gustavson (cited above). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgeqrt3_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *t, integer *ldt, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + integer i__, j, iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer i1, j1, n1, n2; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + xerbla_(char *, integer *, ftnlen), zlarfg_(integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -2; + } else if (*m < *n) { + *info = -1; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*ldt < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEQRT3", &i__1, (ftnlen)7); + return 0; + } + + if (*n == 1) { + +/* Compute Householder transform when N=1 */ + + zlarfg_(m, &a[a_dim1 + 1], &a[f2cmin(2,*m) + a_dim1], &c__1, &t[t_dim1 + + 1]); + + } else { + +/* Otherwise, split A into blocks... */ + + n1 = *n / 2; + n2 = *n - n1; +/* Computing MIN */ + i__1 = n1 + 1; + j1 = f2cmin(i__1,*n); +/* Computing MIN */ + i__1 = *n + 1; + i1 = f2cmin(i__1,*m); + +/* Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */ + + zgeqrt3_(m, &n1, &a[a_offset], lda, &t[t_offset], ldt, &iinfo); + +/* Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] */ + + i__1 = n2; + for (j = 1; j <= i__1; ++j) { + i__2 = n1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (j + n1) * t_dim1; + i__4 = i__ + (j + n1) * a_dim1; + t[i__3].r = a[i__4].r, t[i__3].i = a[i__4].i; + } + } + ztrmm_("L", "L", "C", "U", &n1, &n2, &c_b1, &a[a_offset], lda, &t[j1 * + t_dim1 + 1], ldt) + ; + + i__1 = *m - n1; + zgemm_("C", "N", &n1, &n2, &i__1, &c_b1, &a[j1 + a_dim1], lda, &a[j1 + + j1 * a_dim1], lda, &c_b1, &t[j1 * t_dim1 + 1], ldt); + + ztrmm_("L", "U", "C", "N", &n1, &n2, &c_b1, &t[t_offset], ldt, &t[j1 * + t_dim1 + 1], ldt) + ; + + i__1 = *m - n1; + z__1.r = -1., z__1.i = 0.; + zgemm_("N", "N", &i__1, &n2, &n1, &z__1, &a[j1 + a_dim1], lda, &t[j1 * + t_dim1 + 1], ldt, &c_b1, &a[j1 + j1 * a_dim1], lda); + + ztrmm_("L", "L", "N", "U", &n1, &n2, &c_b1, &a[a_offset], lda, &t[j1 * + t_dim1 + 1], ldt) + ; + + i__1 = n2; + for (j = 1; j <= i__1; ++j) { + i__2 = n1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (j + n1) * a_dim1; + i__4 = i__ + (j + n1) * a_dim1; + i__5 = i__ + (j + n1) * t_dim1; + z__1.r = a[i__4].r - t[i__5].r, z__1.i = a[i__4].i - t[i__5] + .i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } + +/* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */ + + i__1 = *m - n1; + zgeqrt3_(&i__1, &n2, &a[j1 + j1 * a_dim1], lda, &t[j1 + j1 * t_dim1], + ldt, &iinfo); + +/* Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 */ + + i__1 = n1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = n2; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + (j + n1) * t_dim1; + d_cnjg(&z__1, &a[j + n1 + i__ * a_dim1]); + t[i__3].r = z__1.r, t[i__3].i = z__1.i; + } + } + + ztrmm_("R", "L", "N", "U", &n1, &n2, &c_b1, &a[j1 + j1 * a_dim1], lda, + &t[j1 * t_dim1 + 1], ldt); + + i__1 = *m - *n; + zgemm_("C", "N", &n1, &n2, &i__1, &c_b1, &a[i1 + a_dim1], lda, &a[i1 + + j1 * a_dim1], lda, &c_b1, &t[j1 * t_dim1 + 1], ldt); + + z__1.r = -1., z__1.i = 0.; + ztrmm_("L", "U", "N", "N", &n1, &n2, &z__1, &t[t_offset], ldt, &t[j1 * + t_dim1 + 1], ldt) + ; + + ztrmm_("R", "U", "N", "N", &n1, &n2, &c_b1, &t[j1 + j1 * t_dim1], ldt, + &t[j1 * t_dim1 + 1], ldt); + +/* Y = (Y1,Y2); R = [ R1 A(1:N1,J1:N) ]; T = [T1 T3] */ +/* [ 0 R2 ] [ 0 T2] */ + + } + + return 0; + +/* End of ZGEQRT3 */ + +} /* zgeqrt3_ */ + diff --git a/lapack-netlib/SRC/zgerfs.c b/lapack-netlib/SRC/zgerfs.c new file mode 100644 index 000000000..2d4ae1c6b --- /dev/null +++ b/lapack-netlib/SRC/zgerfs.c @@ -0,0 +1,914 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGERFS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGERFS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */ +/* X, LDX, FERR, BERR, WORK, RWORK, INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGERFS improves the computed solution to a system of linear */ +/* > equations and provides error bounds and backward error estimates for */ +/* > the solution. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The original N-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is COMPLEX*16 array, dimension (LDAF,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by ZGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from ZGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by ZGETRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > ITMAX is the maximum number of steps of iterative refinement. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgerfs_(char *trans, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, + doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + + /* Local variables */ + integer kase; + doublereal safe1, safe2; + integer i__, j, k; + doublereal s; + extern logical lsame_(char *, char *); + integer isave[3], count; + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, + integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *), zlacn2_(integer *, + doublecomplex *, doublecomplex *, doublereal *, integer *, + integer *); + extern doublereal dlamch_(char *); + doublereal xk; + integer nz; + doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran; + char transn[1], transt[1]; + doublereal lstres; + extern /* Subroutine */ int zgetrs_(char *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *); + doublereal eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } else if (*ldx < f2cmax(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGERFS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transn = 'N'; + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transn = 'C'; + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = dlamch_("Epsilon"); + safmin = dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + zcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1); + z__1.r = -1., z__1.i = 0.; + zgemv_(trans, n, n, &z__1, &a[a_offset], lda, &x[j * x_dim1 + 1], & + c__1, &c_b1, &work[1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + rwork[i__] = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = d_imag(&b[ + i__ + j * b_dim1]), abs(d__2)); +/* L30: */ + } + +/* Compute abs(op(A))*abs(X) + abs(B). */ + + if (notran) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * x_dim1; + xk = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[k + j * + x_dim1]), abs(d__2)); + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + k * a_dim1; + rwork[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = + d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * xk; +/* L40: */ + } +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + k * a_dim1; + i__5 = i__ + j * x_dim1; + s += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[ + i__ + k * a_dim1]), abs(d__2))) * ((d__3 = x[i__5] + .r, abs(d__3)) + (d__4 = d_imag(&x[i__ + j * + x_dim1]), abs(d__4))); +/* L60: */ + } + rwork[k] += s; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { +/* Computing MAX */ + i__3 = i__; + d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2))) / rwork[i__]; + s = f2cmax(d__3,d__4); + } else { +/* Computing MAX */ + i__3 = i__; + d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2)) + safe1) / (rwork[i__] + + safe1); + s = f2cmax(d__3,d__4); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + zgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], + n, info); + zaxpy_(n, &c_b1, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1); + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use ZLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { + i__3 = i__; + rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__] + ; + } else { + i__3 = i__; + rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__] + + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + zlacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**H). */ + + zgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & + work[1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; +/* L110: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; +/* L120: */ + } + zgetrs_(transn, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & + work[1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + i__3 = i__ + j * x_dim1; + d__3 = lstres, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = + d_imag(&x[i__ + j * x_dim1]), abs(d__2)); + lstres = f2cmax(d__3,d__4); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of ZGERFS */ + +} /* zgerfs_ */ + diff --git a/lapack-netlib/SRC/zgerfsx.c b/lapack-netlib/SRC/zgerfsx.c new file mode 100644 index 000000000..e8dddda2b --- /dev/null +++ b/lapack-netlib/SRC/zgerfsx.c @@ -0,0 +1,381 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGERQ2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGERQ2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGERQ2 computes an RQ factorization of a complex m by n matrix A: */ +/* > A = R * Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, if m <= n, the upper triangle of the subarray */ +/* > A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */ +/* > if m >= n, the elements on and above the (m-n)-th subdiagonal */ +/* > contain the m by n upper trapezoidal matrix R; the remaining */ +/* > elements, with the array TAU, represent the unitary matrix */ +/* > Q as a product of elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1)**H H(2)**H . . . H(k)**H, where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on */ +/* > exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgerq2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, k; + doublecomplex alpha; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *, ftnlen), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, + integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGERQ2", &i__1, (ftnlen)6); + return 0; + } + + k = f2cmin(*m,*n); + + for (i__ = k; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(m-k+i,1:n-k+i-1) */ + + i__1 = *n - k + i__; + zlacgv_(&i__1, &a[*m - k + i__ + a_dim1], lda); + i__1 = *m - k + i__ + (*n - k + i__) * a_dim1; + alpha.r = a[i__1].r, alpha.i = a[i__1].i; + i__1 = *n - k + i__; + zlarfg_(&i__1, &alpha, &a[*m - k + i__ + a_dim1], lda, &tau[i__]); + +/* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */ + + i__1 = *m - k + i__ + (*n - k + i__) * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *m - k + i__ - 1; + i__2 = *n - k + i__; + zlarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[ + i__], &a[a_offset], lda, &work[1]); + i__1 = *m - k + i__ + (*n - k + i__) * a_dim1; + a[i__1].r = alpha.r, a[i__1].i = alpha.i; + i__1 = *n - k + i__ - 1; + zlacgv_(&i__1, &a[*m - k + i__ + a_dim1], lda); +/* L10: */ + } + return 0; + +/* End of ZGERQ2 */ + +} /* zgerq2_ */ + diff --git a/lapack-netlib/SRC/zgerqf.c b/lapack-netlib/SRC/zgerqf.c new file mode 100644 index 000000000..04cc5e1db --- /dev/null +++ b/lapack-netlib/SRC/zgerqf.c @@ -0,0 +1,713 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGERQF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGERQF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGERQF computes an RQ factorization of a complex M-by-N matrix A: */ +/* > A = R * Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if m <= n, the upper triangle of the subarray */ +/* > A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */ +/* > if m >= n, the elements on and above the (m-n)-th subdiagonal */ +/* > contain the M-by-N upper trapezoidal matrix R; */ +/* > the remaining elements, with the array TAU, represent the */ +/* > unitary matrix Q as a product of f2cmin(m,n) elementary */ +/* > reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,M). */ +/* > For optimum performance LWORK >= M*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1)**H H(2)**H . . . H(k)**H, where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on */ +/* > exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgerqf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo; + extern /* Subroutine */ int zgerq2_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + integer ib, nb, ki, kk, mu, nu, nx; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + + if (*info == 0) { + k = f2cmin(*m,*n); + if (k == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, "ZGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *m * nb; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + if (*lwork < f2cmax(1,*m) && ! lquery) { + *info = -7; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGERQF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + return 0; + } + + nbmin = 2; + nx = 1; + iws = *m; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "ZGERQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGERQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially. */ +/* The last kk rows are handled by the block method. */ + + ki = (k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = k, i__2 = ki + nb; + kk = f2cmin(i__1,i__2); + + i__1 = k - kk + 1; + i__2 = -nb; + for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ + += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the RQ factorization of the current block */ +/* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */ + + i__3 = *n - k + i__ + ib - 1; + zgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], & + work[1], &iinfo); + if (*m - k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *n - k + i__ + ib - 1; + zlarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */ + + i__3 = *m - k + i__ - 1; + i__4 = *n - k + i__ + ib - 1; + zlarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3, + &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1], + &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork); + } +/* L10: */ + } + mu = *m - k + i__ + nb - 1; + nu = *n - k + i__ + nb - 1; + } else { + mu = *m; + nu = *n; + } + +/* Use unblocked code to factor the last or only block */ + + if (mu > 0 && nu > 0) { + zgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZGERQF */ + +} /* zgerqf_ */ + diff --git a/lapack-netlib/SRC/zgesc2.c b/lapack-netlib/SRC/zgesc2.c new file mode 100644 index 000000000..be12c4429 --- /dev/null +++ b/lapack-netlib/SRC/zgesc2.c @@ -0,0 +1,630 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGESC2 solves a system of linear equations using the LU factorization with complete pivoting co +mputed by sgetc2. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGESC2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) */ + +/* INTEGER LDA, N */ +/* DOUBLE PRECISION SCALE */ +/* INTEGER IPIV( * ), JPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), RHS( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGESC2 solves a system of linear equations */ +/* > */ +/* > A * X = scale* RHS */ +/* > */ +/* > with a general N-by-N matrix A using the LU factorization with */ +/* > complete pivoting computed by ZGETC2. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the LU part of the factorization of the n-by-n */ +/* > matrix A computed by ZGETC2: A = P * L * U * Q */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1, N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] RHS */ +/* > \verbatim */ +/* > RHS is COMPLEX*16 array, dimension N. */ +/* > On entry, the right hand side vector b. */ +/* > On exit, the solution vector X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N). */ +/* > The pivot indices; for 1 <= i <= N, row i of the */ +/* > matrix has been interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JPIV */ +/* > \verbatim */ +/* > JPIV is INTEGER array, dimension (N). */ +/* > The pivot indices; for 1 <= j <= N, column j of the */ +/* > matrix has been interchanged with column JPIV(j). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION */ +/* > On exit, SCALE contains the scale factor. SCALE is chosen */ +/* > 0 <= SCALE <= 1 to prevent overflow in the solution. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16GEauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* > Umea University, S-901 87 Umea, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int zgesc2_(integer *n, doublecomplex *a, integer *lda, + doublecomplex *rhs, integer *ipiv, integer *jpiv, doublereal *scale) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + doublecomplex temp; + integer i__, j; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *); + doublereal bignum; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal smlnum; + extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, + integer *, integer *, integer *, integer *); + doublereal eps; + + +/* -- LAPACK auxiliary routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Set constant to control overflow */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --rhs; + --ipiv; + --jpiv; + + /* Function Body */ + eps = dlamch_("P"); + smlnum = dlamch_("S") / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Apply permutations IPIV to RHS */ + + i__1 = *n - 1; + zlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1); + +/* Solve for L part */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j; + i__4 = j; + i__5 = j + i__ * a_dim1; + i__6 = i__; + z__2.r = a[i__5].r * rhs[i__6].r - a[i__5].i * rhs[i__6].i, + z__2.i = a[i__5].r * rhs[i__6].i + a[i__5].i * rhs[i__6] + .r; + z__1.r = rhs[i__4].r - z__2.r, z__1.i = rhs[i__4].i - z__2.i; + rhs[i__3].r = z__1.r, rhs[i__3].i = z__1.i; +/* L10: */ + } +/* L20: */ + } + +/* Solve for U part */ + + *scale = 1.; + +/* Check for scaling */ + + i__ = izamax_(n, &rhs[1], &c__1); + if (smlnum * 2. * z_abs(&rhs[i__]) > z_abs(&a[*n + *n * a_dim1])) { + d__1 = z_abs(&rhs[i__]); + z__1.r = .5 / d__1, z__1.i = 0. / d__1; + temp.r = z__1.r, temp.i = z__1.i; + zscal_(n, &temp, &rhs[1], &c__1); + *scale *= temp.r; + } + for (i__ = *n; i__ >= 1; --i__) { + z_div(&z__1, &c_b13, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__1 = i__; + i__2 = i__; + z__1.r = rhs[i__2].r * temp.r - rhs[i__2].i * temp.i, z__1.i = rhs[ + i__2].r * temp.i + rhs[i__2].i * temp.r; + rhs[i__1].r = z__1.r, rhs[i__1].i = z__1.i; + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + i__2 = i__; + i__3 = i__; + i__4 = j; + i__5 = i__ + j * a_dim1; + z__3.r = a[i__5].r * temp.r - a[i__5].i * temp.i, z__3.i = a[i__5] + .r * temp.i + a[i__5].i * temp.r; + z__2.r = rhs[i__4].r * z__3.r - rhs[i__4].i * z__3.i, z__2.i = + rhs[i__4].r * z__3.i + rhs[i__4].i * z__3.r; + z__1.r = rhs[i__3].r - z__2.r, z__1.i = rhs[i__3].i - z__2.i; + rhs[i__2].r = z__1.r, rhs[i__2].i = z__1.i; +/* L30: */ + } +/* L40: */ + } + +/* Apply permutations JPIV to the solution (RHS) */ + + i__1 = *n - 1; + zlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1); + return 0; + +/* End of ZGESC2 */ + +} /* zgesc2_ */ + diff --git a/lapack-netlib/SRC/zgesdd.c b/lapack-netlib/SRC/zgesdd.c new file mode 100644 index 000000000..dbcc1acf5 --- /dev/null +++ b/lapack-netlib/SRC/zgesdd.c @@ -0,0 +1,2853 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGESDD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGESDD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, */ +/* WORK, LWORK, RWORK, IWORK, INFO ) */ + +/* CHARACTER JOBZ */ +/* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), S( * ) */ +/* COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGESDD computes the singular value decomposition (SVD) of a complex */ +/* > M-by-N matrix A, optionally computing the left and/or right singular */ +/* > vectors, by using divide-and-conquer method. The SVD is written */ +/* > */ +/* > A = U * SIGMA * conjugate-transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > f2cmin(m,n) diagonal elements, U is an M-by-M unitary matrix, and */ +/* > V is an N-by-N unitary matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first f2cmin(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > Note that the routine returns VT = V**H, not V. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'A': all M columns of U and all N rows of V**H are */ +/* > returned in the arrays U and VT; */ +/* > = 'S': the first f2cmin(M,N) columns of U and the first */ +/* > f2cmin(M,N) rows of V**H are returned in the arrays U */ +/* > and VT; */ +/* > = 'O': If M >= N, the first N columns of U are overwritten */ +/* > in the array A and all rows of V**H are returned in */ +/* > the array VT; */ +/* > otherwise, all columns of U are returned in the */ +/* > array U and the first M rows of V**H are overwritten */ +/* > in the array A; */ +/* > = 'N': no columns of U or rows of V**H are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if JOBZ = 'O', A is overwritten with the first N columns */ +/* > of U (the left singular vectors, stored */ +/* > columnwise) if M >= N; */ +/* > A is overwritten with the first M rows */ +/* > of V**H (the right singular vectors, stored */ +/* > rowwise) otherwise. */ +/* > if JOBZ .ne. 'O', the contents of A are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is COMPLEX*16 array, dimension (LDU,UCOL) */ +/* > UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; */ +/* > UCOL = f2cmin(M,N) if JOBZ = 'S'. */ +/* > If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M */ +/* > unitary matrix U; */ +/* > if JOBZ = 'S', U contains the first f2cmin(M,N) columns of U */ +/* > (the left singular vectors, stored columnwise); */ +/* > if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; */ +/* > if JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is COMPLEX*16 array, dimension (LDVT,N) */ +/* > If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the */ +/* > N-by-N unitary matrix V**H; */ +/* > if JOBZ = 'S', VT contains the first f2cmin(M,N) rows of */ +/* > V**H (the right singular vectors, stored rowwise); */ +/* > if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; */ +/* > if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; */ +/* > if JOBZ = 'S', LDVT >= f2cmin(M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1. */ +/* > If LWORK = -1, a workspace query is assumed. The optimal */ +/* > size for the WORK array is calculated and stored in WORK(1), */ +/* > and no other work except argument checking is performed. */ +/* > */ +/* > Let mx = f2cmax(M,N) and mn = f2cmin(M,N). */ +/* > If JOBZ = 'N', LWORK >= 2*mn + mx. */ +/* > If JOBZ = 'O', LWORK >= 2*mn*mn + 2*mn + mx. */ +/* > If JOBZ = 'S', LWORK >= mn*mn + 3*mn. */ +/* > If JOBZ = 'A', LWORK >= mn*mn + 2*mn + mx. */ +/* > These are not tight minimums in all cases; see comments inside code. */ +/* > For good performance, LWORK should generally be larger; */ +/* > a query is recommended. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) */ +/* > Let mx = f2cmax(M,N) and mn = f2cmin(M,N). */ +/* > If JOBZ = 'N', LRWORK >= 5*mn (LAPACK <= 3.6 needs 7*mn); */ +/* > else if mx >> mn, LRWORK >= 5*mn*mn + 5*mn; */ +/* > else LRWORK >= f2cmax( 5*mn*mn + 5*mn, */ +/* > 2*mx*mn + 2*mn*mn + mn ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (8*f2cmin(M,N)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: The updating process of DBDSDC did not converge. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16GEsing */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgesdd_(char *jobz, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, + integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Local variables */ + integer lwork_zgebrd_mm__, lwork_zgebrd_mn__, lwork_zgebrd_nn__, + lwork_zgelqf_mn__, lwork_zgeqrf_mn__; + doublecomplex cdum[1]; + integer iscl; + doublereal anrm; + integer idum[1], ierr, itau, lwork_zunglq_mn__, lwork_zunglq_nn__, + lwork_zungqr_mm__, lwork_zungqr_mn__, irvt, lwork_zunmbr_prc_mm__, + lwork_zunmbr_prc_mn__, lwork_zunmbr_prc_nn__, + lwork_zunmbr_qln_mm__, lwork_zunmbr_qln_mn__, + lwork_zunmbr_qln_nn__, i__; + extern logical lsame_(char *, char *); + integer chunk, minmn; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer wrkbl, itaup, itauq; + logical wntqa; + integer nwork; + logical wntqn, wntqo, wntqs; + extern /* Subroutine */ int zlacp2_(char *, integer *, integer *, + doublereal *, integer *, doublecomplex *, integer *); + integer mnthr1, mnthr2, ie; + extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + integer il; + extern doublereal dlamch_(char *); + integer ir, iu; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + integer lwork_zungbr_p_mn__, lwork_zungbr_p_nn__, lwork_zungbr_q_mn__, + lwork_zungbr_q_mm__; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int zgebrd_(integer *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *, integer *); + extern logical disnan_(doublereal *); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zlacrm_(integer *, integer *, doublecomplex *, integer *, + doublereal *, integer *, doublecomplex *, integer *, doublereal *) + , zlarcm_(integer *, integer *, doublereal *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + integer ldwrkl; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *); + integer ldwrkr, minwrk, ldwrku, maxwrk; + extern /* Subroutine */ int zungbr_(char *, integer *, integer *, integer + *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + integer ldwkvt; + doublereal smlnum; + logical wntqas; + extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ), zunglq_(integer *, integer *, integer * + , doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + logical lquery; + integer nrwork; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + integer blk; + doublereal dum[1], eps; + integer iru, ivt; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + mnthr1 = (integer) (minmn * 17. / 9.); + mnthr2 = (integer) (minmn * 5. / 3.); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + lquery = *lwork == -1; + minwrk = 1; + maxwrk = 1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* Compute workspace */ +/* Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace allocated at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* CWorkspace refers to complex workspace, and RWorkspace to */ +/* real workspace. NB refers to the optimal block size for the */ +/* immediately following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (*m >= *n && minmn > 0) { + +/* There is no complex work space needed for bidiagonal SVD */ +/* The real work space needed for bidiagonal SVD (dbdsdc) is */ +/* BDSPAC = 3*N*N + 4*N for singular values and vectors; */ +/* BDSPAC = 4*N for singular values only; */ +/* not including e, RU, and RVT matrices. */ + +/* Compute space preferred for each routine */ + zgebrd_(m, n, cdum, m, dum, dum, cdum, cdum, cdum, &c_n1, &ierr); + lwork_zgebrd_mn__ = (integer) cdum[0].r; + + zgebrd_(n, n, cdum, n, dum, dum, cdum, cdum, cdum, &c_n1, &ierr); + lwork_zgebrd_nn__ = (integer) cdum[0].r; + + zgeqrf_(m, n, cdum, m, cdum, cdum, &c_n1, &ierr); + lwork_zgeqrf_mn__ = (integer) cdum[0].r; + + zungbr_("P", n, n, n, cdum, n, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_p_nn__ = (integer) cdum[0].r; + + zungbr_("Q", m, m, n, cdum, m, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_q_mm__ = (integer) cdum[0].r; + + zungbr_("Q", m, n, n, cdum, m, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_q_mn__ = (integer) cdum[0].r; + + zungqr_(m, m, n, cdum, m, cdum, cdum, &c_n1, &ierr); + lwork_zungqr_mm__ = (integer) cdum[0].r; + + zungqr_(m, n, n, cdum, m, cdum, cdum, &c_n1, &ierr); + lwork_zungqr_mn__ = (integer) cdum[0].r; + + zunmbr_("P", "R", "C", n, n, n, cdum, n, cdum, cdum, n, cdum, & + c_n1, &ierr); + lwork_zunmbr_prc_nn__ = (integer) cdum[0].r; + + zunmbr_("Q", "L", "N", m, m, n, cdum, m, cdum, cdum, m, cdum, & + c_n1, &ierr); + lwork_zunmbr_qln_mm__ = (integer) cdum[0].r; + + zunmbr_("Q", "L", "N", m, n, n, cdum, m, cdum, cdum, m, cdum, & + c_n1, &ierr); + lwork_zunmbr_qln_mn__ = (integer) cdum[0].r; + + zunmbr_("Q", "L", "N", n, n, n, cdum, n, cdum, cdum, n, cdum, & + c_n1, &ierr); + lwork_zunmbr_qln_nn__ = (integer) cdum[0].r; + + if (*m >= mnthr1) { + if (wntqn) { + +/* Path 1 (M >> N, JOBZ='N') */ + + maxwrk = *n + lwork_zgeqrf_mn__; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zgebrd_nn__; + maxwrk = f2cmax(i__1,i__2); + minwrk = *n * 3; + } else if (wntqo) { + +/* Path 2 (M >> N, JOBZ='O') */ + + wrkbl = *n + lwork_zgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + lwork_zungqr_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + lwork_zgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + lwork_zunmbr_qln_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + lwork_zunmbr_prc_nn__; + wrkbl = f2cmax(i__1,i__2); + maxwrk = *m * *n + *n * *n + wrkbl; + minwrk = (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M >> N, JOBZ='S') */ + + wrkbl = *n + lwork_zgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + lwork_zungqr_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + lwork_zgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + lwork_zunmbr_qln_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + lwork_zunmbr_prc_nn__; + wrkbl = f2cmax(i__1,i__2); + maxwrk = *n * *n + wrkbl; + minwrk = *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M >> N, JOBZ='A') */ + + wrkbl = *n + lwork_zgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + lwork_zungqr_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + lwork_zgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + lwork_zunmbr_qln_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + lwork_zunmbr_prc_nn__; + wrkbl = f2cmax(i__1,i__2); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__1 = *n * 3, i__2 = *n + *m; + minwrk = *n * *n + f2cmax(i__1,i__2); + } + } else if (*m >= mnthr2) { + +/* Path 5 (M >> N, but not as much as MNTHR1) */ + + maxwrk = (*n << 1) + lwork_zgebrd_mn__; + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Path 5o (M >> N, JOBZ='O') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zungbr_p_nn__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zungbr_q_mn__; + maxwrk = f2cmax(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Path 5s (M >> N, JOBZ='S') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zungbr_p_nn__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zungbr_q_mn__; + maxwrk = f2cmax(i__1,i__2); + } else if (wntqa) { +/* Path 5a (M >> N, JOBZ='A') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zungbr_p_nn__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zungbr_q_mm__; + maxwrk = f2cmax(i__1,i__2); + } + } else { + +/* Path 6 (M >= N, but not much larger) */ + + maxwrk = (*n << 1) + lwork_zgebrd_mn__; + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Path 6o (M >= N, JOBZ='O') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zunmbr_prc_nn__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zunmbr_qln_mn__; + maxwrk = f2cmax(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Path 6s (M >= N, JOBZ='S') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zunmbr_qln_mn__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zunmbr_prc_nn__; + maxwrk = f2cmax(i__1,i__2); + } else if (wntqa) { +/* Path 6a (M >= N, JOBZ='A') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zunmbr_qln_mm__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + lwork_zunmbr_prc_nn__; + maxwrk = f2cmax(i__1,i__2); + } + } + } else if (minmn > 0) { + +/* There is no complex work space needed for bidiagonal SVD */ +/* The real work space needed for bidiagonal SVD (dbdsdc) is */ +/* BDSPAC = 3*M*M + 4*M for singular values and vectors; */ +/* BDSPAC = 4*M for singular values only; */ +/* not including e, RU, and RVT matrices. */ + +/* Compute space preferred for each routine */ + zgebrd_(m, n, cdum, m, dum, dum, cdum, cdum, cdum, &c_n1, &ierr); + lwork_zgebrd_mn__ = (integer) cdum[0].r; + + zgebrd_(m, m, cdum, m, dum, dum, cdum, cdum, cdum, &c_n1, &ierr); + lwork_zgebrd_mm__ = (integer) cdum[0].r; + + zgelqf_(m, n, cdum, m, cdum, cdum, &c_n1, &ierr); + lwork_zgelqf_mn__ = (integer) cdum[0].r; + + zungbr_("P", m, n, m, cdum, m, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_p_mn__ = (integer) cdum[0].r; + + zungbr_("P", n, n, m, cdum, n, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_p_nn__ = (integer) cdum[0].r; + + zungbr_("Q", m, m, n, cdum, m, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_q_mm__ = (integer) cdum[0].r; + + zunglq_(m, n, m, cdum, m, cdum, cdum, &c_n1, &ierr); + lwork_zunglq_mn__ = (integer) cdum[0].r; + + zunglq_(n, n, m, cdum, n, cdum, cdum, &c_n1, &ierr); + lwork_zunglq_nn__ = (integer) cdum[0].r; + + zunmbr_("P", "R", "C", m, m, m, cdum, m, cdum, cdum, m, cdum, & + c_n1, &ierr); + lwork_zunmbr_prc_mm__ = (integer) cdum[0].r; + + zunmbr_("P", "R", "C", m, n, m, cdum, m, cdum, cdum, m, cdum, & + c_n1, &ierr); + lwork_zunmbr_prc_mn__ = (integer) cdum[0].r; + + zunmbr_("P", "R", "C", n, n, m, cdum, n, cdum, cdum, n, cdum, & + c_n1, &ierr); + lwork_zunmbr_prc_nn__ = (integer) cdum[0].r; + + zunmbr_("Q", "L", "N", m, m, m, cdum, m, cdum, cdum, m, cdum, & + c_n1, &ierr); + lwork_zunmbr_qln_mm__ = (integer) cdum[0].r; + + if (*n >= mnthr1) { + if (wntqn) { + +/* Path 1t (N >> M, JOBZ='N') */ + + maxwrk = *m + lwork_zgelqf_mn__; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zgebrd_mm__; + maxwrk = f2cmax(i__1,i__2); + minwrk = *m * 3; + } else if (wntqo) { + +/* Path 2t (N >> M, JOBZ='O') */ + + wrkbl = *m + lwork_zgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + lwork_zunglq_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + lwork_zgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + lwork_zunmbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + lwork_zunmbr_prc_mm__; + wrkbl = f2cmax(i__1,i__2); + maxwrk = *m * *n + *m * *m + wrkbl; + minwrk = (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N >> M, JOBZ='S') */ + + wrkbl = *m + lwork_zgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + lwork_zunglq_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + lwork_zgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + lwork_zunmbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + lwork_zunmbr_prc_mm__; + wrkbl = f2cmax(i__1,i__2); + maxwrk = *m * *m + wrkbl; + minwrk = *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N >> M, JOBZ='A') */ + + wrkbl = *m + lwork_zgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + lwork_zunglq_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + lwork_zgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + lwork_zunmbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + lwork_zunmbr_prc_mm__; + wrkbl = f2cmax(i__1,i__2); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__1 = *m * 3, i__2 = *m + *n; + minwrk = *m * *m + f2cmax(i__1,i__2); + } + } else if (*n >= mnthr2) { + +/* Path 5t (N >> M, but not as much as MNTHR1) */ + + maxwrk = (*m << 1) + lwork_zgebrd_mn__; + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Path 5to (N >> M, JOBZ='O') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zungbr_q_mm__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zungbr_p_mn__; + maxwrk = f2cmax(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Path 5ts (N >> M, JOBZ='S') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zungbr_q_mm__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zungbr_p_mn__; + maxwrk = f2cmax(i__1,i__2); + } else if (wntqa) { +/* Path 5ta (N >> M, JOBZ='A') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zungbr_q_mm__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zungbr_p_nn__; + maxwrk = f2cmax(i__1,i__2); + } + } else { + +/* Path 6t (N > M, but not much larger) */ + + maxwrk = (*m << 1) + lwork_zgebrd_mn__; + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Path 6to (N > M, JOBZ='O') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zunmbr_qln_mm__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zunmbr_prc_mn__; + maxwrk = f2cmax(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Path 6ts (N > M, JOBZ='S') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zunmbr_qln_mm__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zunmbr_prc_mn__; + maxwrk = f2cmax(i__1,i__2); + } else if (wntqa) { +/* Path 6ta (N > M, JOBZ='A') */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zunmbr_qln_mm__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + lwork_zunmbr_prc_nn__; + maxwrk = f2cmax(i__1,i__2); + } + } + } + maxwrk = f2cmax(maxwrk,minwrk); + } + if (*info == 0) { + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESDD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = sqrt(dlamch_("S")) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, dum); + if (disnan_(&anrm)) { + *info = -4; + return 0; + } + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce using the QR */ +/* decomposition (if sufficient workspace available) */ + + if (*m >= mnthr1) { + + if (wntqn) { + +/* Path 1 (M >> N, JOBZ='N') */ +/* No singular vectors to be computed */ + + itau = 1; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* CWorkspace: need N [tau] + N [work] */ +/* CWorkspace: prefer N [tau] + N*NB [work] */ +/* RWorkspace: need 0 */ + + i__1 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + zlaset_("L", &i__1, &i__2, &c_b1, &c_b1, &a[a_dim1 + 2], lda); + ie = 1; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* CWorkspace: need 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + 2*N*NB [work] */ +/* RWorkspace: need N [e] */ + + i__1 = *lwork - nwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nrwork = ie + *n; + +/* Perform bidiagonal SVD, compute singular values only */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + BDSPAC */ + + dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* Path 2 (M >> N, JOBZ='O') */ +/* N left singular vectors to be overwritten on A and */ +/* N right singular vectors to be computed in VT */ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + if (*lwork >= *m * *n + *n * *n + *n * 3) { + +/* WORK(IR) is M by N */ + + ldwrkr = *m; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work] */ +/* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work] */ +/* RWorkspace: need 0 */ + + i__1 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK( IR ), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + zlaset_("L", &i__1, &i__2, &c_b1, &c_b1, &work[ir + 1], & + ldwrkr); + +/* Generate Q in A */ +/* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work] */ +/* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work] */ +/* RWorkspace: need 0 */ + + i__1 = *lwork - nwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + 2*N*NB [work] */ +/* RWorkspace: need N [e] */ + + i__1 = *lwork - nwork + 1; + zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of R in WORK(IRU) and computing right singular vectors */ +/* of R in WORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC */ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) */ +/* Overwrite WORK(IU) by the left singular vectors of R */ +/* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + +/* Copy real matrix RWORK(IRVT) to complex matrix VT */ +/* Overwrite VT by the right singular vectors of R */ +/* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in WORK(IR) and copying to A */ +/* CWorkspace: need N*N [U] + N*N [R] */ +/* CWorkspace: prefer N*N [U] + M*N [R] */ +/* RWorkspace: need 0 */ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = f2cmin(i__3,ldwrkr); + zgemm_("N", "N", &chunk, n, n, &c_b2, &a[i__ + a_dim1], + lda, &work[iu], &ldwrku, &c_b1, &work[ir], & + ldwrkr); + zlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* Path 3 (M >> N, JOBZ='S') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* CWorkspace: need N*N [R] + N [tau] + N [work] */ +/* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work] */ +/* RWorkspace: need 0 */ + + i__2 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + zlaset_("L", &i__2, &i__1, &c_b1, &c_b1, &work[ir + 1], & + ldwrkr); + +/* Generate Q in A */ +/* CWorkspace: need N*N [R] + N [tau] + N [work] */ +/* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work] */ +/* RWorkspace: need 0 */ + + i__2 = *lwork - nwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + 2*N*NB [work] */ +/* RWorkspace: need N [e] */ + + i__2 = *lwork - nwork + 1; + zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC */ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix U */ +/* Overwrite U by left singular vectors of R */ +/* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* Copy real matrix RWORK(IRVT) to complex matrix VT */ +/* Overwrite VT by right singular vectors of R */ +/* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in U */ +/* CWorkspace: need N*N [R] */ +/* RWorkspace: need 0 */ + + zlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + zgemm_("N", "N", m, n, n, &c_b2, &a[a_offset], lda, &work[ir], + &ldwrkr, &c_b1, &u[u_offset], ldu); + + } else if (wntqa) { + +/* Path 4 (M >> N, JOBZ='A') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* CWorkspace: need N*N [U] + N [tau] + N [work] */ +/* CWorkspace: prefer N*N [U] + N [tau] + N*NB [work] */ +/* RWorkspace: need 0 */ + + i__2 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* Generate Q in U */ +/* CWorkspace: need N*N [U] + N [tau] + M [work] */ +/* CWorkspace: prefer N*N [U] + N [tau] + M*NB [work] */ +/* RWorkspace: need 0 */ + + i__2 = *lwork - nwork + 1; + zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out below it */ + + i__2 = *n - 1; + i__1 = *n - 1; + zlaset_("L", &i__2, &i__1, &c_b1, &c_b1, &a[a_dim1 + 2], lda); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + 2*N*NB [work] */ +/* RWorkspace: need N [e] */ + + i__2 = *lwork - nwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC */ + + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) */ +/* Overwrite WORK(IU) by left singular vectors of R */ +/* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* Copy real matrix RWORK(IRVT) to complex matrix VT */ +/* Overwrite VT by right singular vectors of R */ +/* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* CWorkspace: need N*N [U] */ +/* RWorkspace: need 0 */ + + zgemm_("N", "N", m, n, n, &c_b2, &u[u_offset], ldu, &work[iu], + &ldwrku, &c_b1, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else if (*m >= mnthr2) { + +/* MNTHR2 <= M < MNTHR1 */ + +/* Path 5 (M >> N, but not as much as MNTHR1) */ +/* Reduce to bidiagonal form without QR decomposition, use */ +/* ZUNGBR and matrix multiplication to compute singular vectors */ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize A */ +/* CWorkspace: need 2*N [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work] */ +/* RWorkspace: need N [e] */ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* Path 5n (M >> N, JOBZ='N') */ +/* Compute singular values only */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + BDSPAC */ + + dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* Path 5o (M >> N, JOBZ='O') */ +/* Copy A to VT, generate P**H */ +/* CWorkspace: need 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* Generate Q in A */ +/* CWorkspace: need 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + i__2 = *lwork - nwork + 1; + zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK(IU) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC */ + + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Multiply real matrix RWORK(IRVT) by P**H in VT, */ +/* storing the result in WORK(IU), copying to VT */ +/* CWorkspace: need 2*N [tauq, taup] + N*N [U] */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork] */ + + zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu] + , &ldwrku, &rwork[nrwork]); + zlacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt); + +/* Multiply Q in A by real matrix RWORK(IRU), storing the */ +/* result in WORK(IU), copying to A */ +/* CWorkspace: need 2*N [tauq, taup] + N*N [U] */ +/* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] */ +/* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork] */ +/* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here */ + + nrwork = irvt; + i__2 = *m; + i__1 = ldwrku; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = f2cmin(i__3,ldwrku); + zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n, + &work[iu], &ldwrku, &rwork[nrwork]); + zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L20: */ + } + + } else if (wntqs) { + +/* Path 5s (M >> N, JOBZ='S') */ +/* Copy A to VT, generate P**H */ +/* CWorkspace: need 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* Copy A to U, generate Q */ +/* CWorkspace: need 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC */ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Multiply real matrix RWORK(IRVT) by P**H in VT, */ +/* storing the result in A, copying to VT */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork] */ + + zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* Multiply Q in U by real matrix RWORK(IRU), storing the */ +/* result in A, copying to U */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here */ + + nrwork = irvt; + zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } else { + +/* Path 5a (M >> N, JOBZ='A') */ +/* Copy A to VT, generate P**H */ +/* CWorkspace: need 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* Copy A to U, generate Q */ +/* CWorkspace: need 2*N [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC */ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Multiply real matrix RWORK(IRVT) by P**H in VT, */ +/* storing the result in A, copying to VT */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork] */ + + zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* Multiply Q in U by real matrix RWORK(IRU), storing the */ +/* result in A, copying to U */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here */ + + nrwork = irvt; + zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } + + } else { + +/* M .LT. MNTHR2 */ + +/* Path 6 (M >= N, but not much larger) */ +/* Reduce to bidiagonal form without QR decomposition */ +/* Use ZUNMBR to compute singular vectors */ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize A */ +/* CWorkspace: need 2*N [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work] */ +/* RWorkspace: need N [e] */ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + if (wntqn) { + +/* Path 6n (M >= N, JOBZ='N') */ +/* Compute singular values only */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + BDSPAC */ + + dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK( IU ) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* Path 6o (M >= N, JOBZ='O') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC */ + + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRVT) to complex matrix VT */ +/* Overwrite VT by right singular vectors of A */ +/* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work] */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] */ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* Path 6o-fast */ +/* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) */ +/* Overwrite WORK(IU) by left singular vectors of A, copying */ +/* to A */ +/* CWorkspace: need 2*N [tauq, taup] + M*N [U] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] + N*NB [work] */ +/* RWorkspace: need N [e] + N*N [RU] */ + + zlaset_("F", m, n, &c_b1, &c_b1, &work[iu], &ldwrku); + zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + zlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* Path 6o-slow */ +/* Generate Q in A */ +/* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work] */ +/* RWorkspace: need 0 */ + + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__1, &ierr); + +/* Multiply Q in A by real matrix RWORK(IRU), storing the */ +/* result in WORK(IU), copying to A */ +/* CWorkspace: need 2*N [tauq, taup] + N*N [U] */ +/* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] */ +/* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork] */ +/* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here */ + + nrwork = irvt; + i__1 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = f2cmin(i__3,ldwrku); + zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], + n, &work[iu], &ldwrku, &rwork[nrwork]); + zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L30: */ + } + } + + } else if (wntqs) { + +/* Path 6s (M >= N, JOBZ='S') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC */ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix U */ +/* Overwrite U by left singular vectors of A */ +/* CWorkspace: need 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] */ + + zlaset_("F", m, n, &c_b1, &c_b1, &u[u_offset], ldu) + ; + zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* Copy real matrix RWORK(IRVT) to complex matrix VT */ +/* Overwrite VT by right singular vectors of A */ +/* CWorkspace: need 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] */ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } else { + +/* Path 6a (M >= N, JOBZ='A') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC */ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + zlaset_("F", m, m, &c_b1, &c_b1, &u[u_offset], ldu) + ; + if (*m > *n) { + i__2 = *m - *n; + i__1 = *m - *n; + zlaset_("F", &i__2, &i__1, &c_b1, &c_b2, &u[*n + 1 + (*n + + 1) * u_dim1], ldu); + } + +/* Copy real matrix RWORK(IRU) to complex matrix U */ +/* Overwrite U by left singular vectors of A */ +/* CWorkspace: need 2*N [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work] */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] */ + + zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* Copy real matrix RWORK(IRVT) to complex matrix VT */ +/* Overwrite VT by right singular vectors of A */ +/* CWorkspace: need 2*N [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] */ +/* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] */ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } + + } + + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce using the LQ decomposition (if */ +/* sufficient workspace available) */ + + if (*n >= mnthr1) { + + if (wntqn) { + +/* Path 1t (N >> M, JOBZ='N') */ +/* No singular vectors to be computed */ + + itau = 1; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* CWorkspace: need M [tau] + M [work] */ +/* CWorkspace: prefer M [tau] + M*NB [work] */ +/* RWorkspace: need 0 */ + + i__2 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__1 = *m - 1; + zlaset_("U", &i__2, &i__1, &c_b1, &c_b1, &a[(a_dim1 << 1) + 1] + , lda); + ie = 1; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* CWorkspace: need 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + 2*M*NB [work] */ +/* RWorkspace: need M [e] */ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + nrwork = ie + *m; + +/* Perform bidiagonal SVD, compute singular values only */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + BDSPAC */ + + dbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* Path 2t (N >> M, JOBZ='O') */ +/* M right singular vectors to be overwritten on A and */ +/* M left singular vectors to be computed in U */ + + ivt = 1; + ldwkvt = *m; + +/* WORK(IVT) is M by M */ + + il = ivt + ldwkvt * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3) { + +/* WORK(IL) M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + +/* WORK(IL) is M by CHUNK */ + + ldwrkl = *m; + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + itau = il + ldwrkl * chunk; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work] */ +/* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */ +/* RWorkspace: need 0 */ + + i__2 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + zlaset_("U", &i__2, &i__1, &c_b1, &c_b1, &work[il + ldwrkl], & + ldwrkl); + +/* Generate Q in A */ +/* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work] */ +/* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */ +/* RWorkspace: need 0 */ + + i__2 = *lwork - nwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL) */ +/* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + 2*M*NB [work] */ +/* RWorkspace: need M [e] */ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC */ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) */ +/* Overwrite WORK(IU) by the left singular vectors of L */ +/* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) */ +/* Overwrite WORK(IVT) by the right singular vectors of L */ +/* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* Multiply right singular vectors of L in WORK(IL) by Q */ +/* in A, storing result in WORK(IL) and copying to A */ +/* CWorkspace: need M*M [VT] + M*M [L] */ +/* CWorkspace: prefer M*M [VT] + M*N [L] */ +/* RWorkspace: need 0 */ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = f2cmin(i__3,chunk); + zgemm_("N", "N", m, &blk, m, &c_b2, &work[ivt], m, &a[i__ + * a_dim1 + 1], lda, &c_b1, &work[il], &ldwrkl); + zlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + + } else if (wntqs) { + +/* Path 3t (N >> M, JOBZ='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* CWorkspace: need M*M [L] + M [tau] + M [work] */ +/* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work] */ +/* RWorkspace: need 0 */ + + i__1 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + zlaset_("U", &i__1, &i__2, &c_b1, &c_b1, &work[il + ldwrkl], & + ldwrkl); + +/* Generate Q in A */ +/* CWorkspace: need M*M [L] + M [tau] + M [work] */ +/* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work] */ +/* RWorkspace: need 0 */ + + i__1 = *lwork - nwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL) */ +/* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + 2*M*NB [work] */ +/* RWorkspace: need M [e] */ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC */ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix U */ +/* Overwrite U by left singular vectors of L */ +/* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* Copy real matrix RWORK(IRVT) to complex matrix VT */ +/* Overwrite VT by left singular vectors of L */ +/* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* Copy VT to WORK(IL), multiply right singular vectors of L */ +/* in WORK(IL) by Q in A, storing result in VT */ +/* CWorkspace: need M*M [L] */ +/* RWorkspace: need 0 */ + + zlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + zgemm_("N", "N", m, n, m, &c_b2, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b1, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* Path 4t (N >> M, JOBZ='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* CWorkspace: need M*M [VT] + M [tau] + M [work] */ +/* CWorkspace: prefer M*M [VT] + M [tau] + M*NB [work] */ +/* RWorkspace: need 0 */ + + i__1 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* Generate Q in VT */ +/* CWorkspace: need M*M [VT] + M [tau] + N [work] */ +/* CWorkspace: prefer M*M [VT] + M [tau] + N*NB [work] */ +/* RWorkspace: need 0 */ + + i__1 = *lwork - nwork + 1; + zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__1, &ierr); + +/* Produce L in A, zeroing out above it */ + + i__1 = *m - 1; + i__2 = *m - 1; + zlaset_("U", &i__1, &i__2, &c_b1, &c_b1, &a[(a_dim1 << 1) + 1] + , lda); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + 2*M*NB [work] */ +/* RWorkspace: need M [e] */ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC */ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix U */ +/* Overwrite U by left singular vectors of L */ +/* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) */ +/* Overwrite WORK(IVT) by right singular vectors of L */ +/* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, & + ierr); + +/* Multiply right singular vectors of L in WORK(IVT) by */ +/* Q in VT, storing result in A */ +/* CWorkspace: need M*M [VT] */ +/* RWorkspace: need 0 */ + + zgemm_("N", "N", m, n, m, &c_b2, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b1, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else if (*n >= mnthr2) { + +/* MNTHR2 <= N < MNTHR1 */ + +/* Path 5t (N >> M, but not as much as MNTHR1) */ +/* Reduce to bidiagonal form without QR decomposition, use */ +/* ZUNGBR and matrix multiplication to compute singular vectors */ + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize A */ +/* CWorkspace: need 2*M [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work] */ +/* RWorkspace: need M [e] */ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + + if (wntqn) { + +/* Path 5tn (N >> M, JOBZ='N') */ +/* Compute singular values only */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + BDSPAC */ + + dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + ivt = nwork; + +/* Path 5to (N >> M, JOBZ='O') */ +/* Copy A to U, generate Q */ +/* CWorkspace: need 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* Generate P**H in A */ +/* CWorkspace: need 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + i__1 = *lwork - nwork + 1; + zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + nwork], &i__1, &ierr); + + ldwkvt = *m; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + nwork = ivt + ldwkvt * *n; + chunk = *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC */ + + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Multiply Q in U by real matrix RWORK(IRVT) */ +/* storing the result in WORK(IVT), copying to U */ +/* CWorkspace: need 2*M [tauq, taup] + M*M [VT] */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork] */ + + zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], & + ldwkvt, &rwork[nrwork]); + zlacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu); + +/* Multiply RWORK(IRVT) by P**H in A, storing the */ +/* result in WORK(IVT), copying to A */ +/* CWorkspace: need 2*M [tauq, taup] + M*M [VT] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] */ +/* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork] */ +/* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here */ + + nrwork = iru; + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = f2cmin(i__3,chunk); + zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1], + lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L50: */ + } + } else if (wntqs) { + +/* Path 5ts (N >> M, JOBZ='S') */ +/* Copy A to U, generate Q */ +/* CWorkspace: need 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* Copy A to VT, generate P**H */ +/* CWorkspace: need 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC */ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Multiply Q in U by real matrix RWORK(IRU), storing the */ +/* result in A, copying to U */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork] */ + + zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* Multiply real matrix RWORK(IRVT) by P**H in VT, */ +/* storing the result in A, copying to VT */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here */ + + nrwork = iru; + zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } else { + +/* Path 5ta (N >> M, JOBZ='A') */ +/* Copy A to U, generate Q */ +/* CWorkspace: need 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* Copy A to VT, generate P**H */ +/* CWorkspace: need 2*M [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work] */ +/* RWorkspace: need 0 */ + + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC */ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Multiply Q in U by real matrix RWORK(IRU), storing the */ +/* result in A, copying to U */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork] */ + + zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* Multiply real matrix RWORK(IRVT) by P**H in VT, */ +/* storing the result in A, copying to VT */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here */ + + nrwork = iru; + zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } + + } else { + +/* N .LT. MNTHR2 */ + +/* Path 6t (N > M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ +/* Use ZUNMBR to compute singular vectors */ + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize A */ +/* CWorkspace: need 2*M [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work] */ +/* RWorkspace: need M [e] */ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* Path 6tn (N > M, JOBZ='N') */ +/* Compute singular values only */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + BDSPAC */ + + dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { +/* Path 6to (N > M, JOBZ='O') */ + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + zlaset_("F", m, n, &c_b1, &c_b1, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC */ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix U */ +/* Overwrite U by left singular vectors of A */ +/* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work] */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] */ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3) { + +/* Path 6to-fast */ +/* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) */ +/* Overwrite WORK(IVT) by right singular vectors of A, */ +/* copying to A */ +/* CWorkspace: need 2*M [tauq, taup] + M*N [VT] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] + M*NB [work] */ +/* RWorkspace: need M [e] + M*M [RVT] */ + + zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + zlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* Path 6to-slow */ +/* Generate P**H in A */ +/* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work] */ +/* RWorkspace: need 0 */ + + i__2 = *lwork - nwork + 1; + zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* Multiply Q in A by real matrix RWORK(IRU), storing the */ +/* result in WORK(IU), copying to A */ +/* CWorkspace: need 2*M [tauq, taup] + M*M [VT] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] */ +/* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork] */ +/* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here */ + + nrwork = iru; + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = f2cmin(i__3,chunk); + zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1] + , lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L60: */ + } + } + } else if (wntqs) { + +/* Path 6ts (N > M, JOBZ='S') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC */ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix U */ +/* Overwrite U by left singular vectors of A */ +/* CWorkspace: need 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] */ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* Copy real matrix RWORK(IRVT) to complex matrix VT */ +/* Overwrite VT by right singular vectors of A */ +/* CWorkspace: need 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need M [e] + M*M [RVT] */ + + zlaset_("F", m, n, &c_b1, &c_b1, &vt[vt_offset], ldvt); + zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else { + +/* Path 6ta (N > M, JOBZ='A') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in RWORK(IRU) and computing right */ +/* singular vectors of bidiagonal matrix in RWORK(IRVT) */ +/* CWorkspace: need 0 */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC */ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Copy real matrix RWORK(IRU) to complex matrix U */ +/* Overwrite U by left singular vectors of A */ +/* CWorkspace: need 2*M [tauq, taup] + M [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] */ +/* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] */ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* Set all of VT to identity matrix */ + + zlaset_("F", n, n, &c_b1, &c_b2, &vt[vt_offset], ldvt); + +/* Copy real matrix RWORK(IRVT) to complex matrix VT */ +/* Overwrite VT by right singular vectors of A */ +/* CWorkspace: need 2*M [tauq, taup] + N [work] */ +/* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work] */ +/* RWorkspace: need M [e] + M*M [RVT] */ + + zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm > bignum) { + i__1 = minmn - 1; + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &i__1, &c__1, &rwork[ + ie], &minmn, &ierr); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm < smlnum) { + i__1 = minmn - 1; + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__1, &c__1, &rwork[ + ie], &minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + + return 0; + +/* End of ZGESDD */ + +} /* zgesdd_ */ + diff --git a/lapack-netlib/SRC/zgesv.c b/lapack-netlib/SRC/zgesv.c new file mode 100644 index 000000000..bc06b738c --- /dev/null +++ b/lapack-netlib/SRC/zgesv.c @@ -0,0 +1,577 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGESV computes the solution to system of linear equations A * X = B for GE matrices (simple dr +iver) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGESV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGESV computes the solution to a complex system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > The LU decomposition with partial pivoting and row interchanges is */ +/* > used to factor A as */ +/* > A = P * L * U, */ +/* > where P is a permutation matrix, L is unit lower triangular, and U is */ +/* > upper triangular. The factored form of A is then used to solve the */ +/* > system of equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the N-by-N coefficient matrix A. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices that define the permutation matrix P; */ +/* > row i of the matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS matrix of right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup complex16GEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zgesv_(integer *n, integer *nrhs, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zgetrf_( + integer *, integer *, doublecomplex *, integer *, integer *, + integer *), zgetrs_(char *, integer *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESV ", &i__1, (ftnlen)6); + return 0; + } + +/* Compute the LU factorization of A. */ + + zgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + zgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of ZGESV */ + +} /* zgesv_ */ + diff --git a/lapack-netlib/SRC/zgesvd.c b/lapack-netlib/SRC/zgesvd.c new file mode 100644 index 000000000..f38e7cf6f --- /dev/null +++ b/lapack-netlib/SRC/zgesvd.c @@ -0,0 +1,4606 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGESVD computes the singular value decomposition (SVD) for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGESVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, */ +/* WORK, LWORK, RWORK, INFO ) */ + +/* CHARACTER JOBU, JOBVT */ +/* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N */ +/* DOUBLE PRECISION RWORK( * ), S( * ) */ +/* COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGESVD computes the singular value decomposition (SVD) of a complex */ +/* > M-by-N matrix A, optionally computing the left and/or right singular */ +/* > vectors. The SVD is written */ +/* > */ +/* > A = U * SIGMA * conjugate-transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > f2cmin(m,n) diagonal elements, U is an M-by-M unitary matrix, and */ +/* > V is an N-by-N unitary matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first f2cmin(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > Note that the routine returns V**H, not V. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'A': all M columns of U are returned in array U: */ +/* > = 'S': the first f2cmin(m,n) columns of U (the left singular */ +/* > vectors) are returned in the array U; */ +/* > = 'O': the first f2cmin(m,n) columns of U (the left singular */ +/* > vectors) are overwritten on the array A; */ +/* > = 'N': no columns of U (no left singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVT */ +/* > \verbatim */ +/* > JOBVT is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix */ +/* > V**H: */ +/* > = 'A': all N rows of V**H are returned in the array VT; */ +/* > = 'S': the first f2cmin(m,n) rows of V**H (the right singular */ +/* > vectors) are returned in the array VT; */ +/* > = 'O': the first f2cmin(m,n) rows of V**H (the right singular */ +/* > vectors) are overwritten on the array A; */ +/* > = 'N': no rows of V**H (no right singular vectors) are */ +/* > computed. */ +/* > */ +/* > JOBVT and JOBU cannot both be 'O'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if JOBU = 'O', A is overwritten with the first f2cmin(m,n) */ +/* > columns of U (the left singular vectors, */ +/* > stored columnwise); */ +/* > if JOBVT = 'O', A is overwritten with the first f2cmin(m,n) */ +/* > rows of V**H (the right singular vectors, */ +/* > stored rowwise); */ +/* > if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */ +/* > are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is COMPLEX*16 array, dimension (LDU,UCOL) */ +/* > (LDU,M) if JOBU = 'A' or (LDU,f2cmin(M,N)) if JOBU = 'S'. */ +/* > If JOBU = 'A', U contains the M-by-M unitary matrix U; */ +/* > if JOBU = 'S', U contains the first f2cmin(m,n) columns of U */ +/* > (the left singular vectors, stored columnwise); */ +/* > if JOBU = 'N' or 'O', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; if */ +/* > JOBU = 'S' or 'A', LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is COMPLEX*16 array, dimension (LDVT,N) */ +/* > If JOBVT = 'A', VT contains the N-by-N unitary matrix */ +/* > V**H; */ +/* > if JOBVT = 'S', VT contains the first f2cmin(m,n) rows of */ +/* > V**H (the right singular vectors, stored rowwise); */ +/* > if JOBVT = 'N' or 'O', VT is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; if */ +/* > JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= f2cmin(M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)). */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (5*f2cmin(M,N)) */ +/* > On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the */ +/* > unconverged superdiagonal elements of an upper bidiagonal */ +/* > matrix B whose diagonal is in S (not necessarily sorted). */ +/* > B satisfies A = U * B * VT, so it has the same singular */ +/* > values as A, and singular vectors related by U and VT. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if ZBDSQR did not converge, INFO specifies how many */ +/* > superdiagonals of an intermediate bidiagonal form B */ +/* > did not converge to zero. See the description of RWORK */ +/* > above for details. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complex16GEsing */ + +/* ===================================================================== */ +/* Subroutine */ int zgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, + integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], + i__2, i__3, i__4; + char ch__1[2]; + + /* Local variables */ + doublecomplex cdum[1]; + integer iscl; + doublereal anrm; + integer ierr, itau, ncvt, nrvt, lwork_zgebrd__, lwork_zgelqf__, i__, + lwork_zgeqrf__; + extern logical lsame_(char *, char *); + integer chunk, minmn; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer wrkbl, itaup, itauq, mnthr, iwork; + logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs; + integer ie; + extern doublereal dlamch_(char *); + integer ir, iu; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), xerbla_(char *, integer *, ftnlen), + zgebrd_(integer *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *, doublecomplex *, doublecomplex *, + doublecomplex *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + doublereal bignum; + extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zlascl_(char *, integer *, integer *, doublereal *, doublereal + *, integer *, integer *, doublecomplex *, integer *, integer *), zgeqrf_(integer *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *), zlacpy_( + char *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaset_(char *, integer *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *); + integer ldwrkr; + extern /* Subroutine */ int zbdsqr_(char *, integer *, integer *, integer + *, integer *, doublereal *, doublereal *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *); + integer minwrk, ldwrku, maxwrk; + extern /* Subroutine */ int zungbr_(char *, integer *, integer *, integer + *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + doublereal smlnum; + integer irwork; + extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ), zunglq_(integer *, integer *, integer * + , doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + logical lquery, wntuas, wntvas; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + integer blk, lwork_zungbr_p__, lwork_zungbr_q__, ncu; + doublereal dum[1]; + integer lwork_zunglq_m__, lwork_zunglq_n__; + doublereal eps; + integer lwork_zungqr_m__, lwork_zungqr_n__, nru; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --work; + --rwork; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + wntua = lsame_(jobu, "A"); + wntus = lsame_(jobu, "S"); + wntuas = wntua || wntus; + wntuo = lsame_(jobu, "O"); + wntun = lsame_(jobu, "N"); + wntva = lsame_(jobvt, "A"); + wntvs = lsame_(jobvt, "S"); + wntvas = wntva || wntvs; + wntvo = lsame_(jobvt, "O"); + wntvn = lsame_(jobvt, "N"); + lquery = *lwork == -1; + + if (! (wntua || wntus || wntuo || wntun)) { + *info = -1; + } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else if (*ldu < 1 || wntuas && *ldu < *m) { + *info = -9; + } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* CWorkspace refers to complex workspace, and RWorkspace to */ +/* real workspace. NB refers to the optimal block size for the */ +/* immediately following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (*m >= *n && minmn > 0) { + +/* Space needed for ZBDSQR is BDSPAC = 5*N */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "ZGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); +/* Compute space needed for ZGEQRF */ + zgeqrf_(m, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr); + lwork_zgeqrf__ = (integer) cdum[0].r; +/* Compute space needed for ZUNGQR */ + zungqr_(m, n, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr); + lwork_zungqr_n__ = (integer) cdum[0].r; + zungqr_(m, m, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr); + lwork_zungqr_m__ = (integer) cdum[0].r; +/* Compute space needed for ZGEBRD */ + zgebrd_(n, n, &a[a_offset], lda, &s[1], dum, cdum, cdum, cdum, & + c_n1, &ierr); + lwork_zgebrd__ = (integer) cdum[0].r; +/* Compute space needed for ZUNGBR */ + zungbr_("P", n, n, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_p__ = (integer) cdum[0].r; + zungbr_("Q", n, n, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_q__ = (integer) cdum[0].r; + + if (*m >= mnthr) { + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ + + maxwrk = *n + lwork_zgeqrf__; +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*n << 1) + lwork_zgebrd__; + maxwrk = f2cmax(i__2,i__3); + if (wntvo || wntvas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*n << 1) + lwork_zungbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + minwrk = *n * 3; + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ + + wrkbl = *n + lwork_zgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_zungqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n; + maxwrk = f2cmax(i__2,i__3); + minwrk = (*n << 1) + *m; + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_zgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_zungqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n; + maxwrk = f2cmax(i__2,i__3); + minwrk = (*n << 1) + *m; + } else if (wntus && wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ + + wrkbl = *n + lwork_zgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_zungqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = *n * *n + wrkbl; + minwrk = (*n << 1) + *m; + } else if (wntus && wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ + + wrkbl = *n + lwork_zgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_zungqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = (*n << 1) * *n + wrkbl; + minwrk = (*n << 1) + *m; + } else if (wntus && wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_zgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_zungqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = *n * *n + wrkbl; + minwrk = (*n << 1) + *m; + } else if (wntua && wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ + + wrkbl = *n + lwork_zgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_zungqr_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = *n * *n + wrkbl; + minwrk = (*n << 1) + *m; + } else if (wntua && wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ + + wrkbl = *n + lwork_zgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_zungqr_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = (*n << 1) * *n + wrkbl; + minwrk = (*n << 1) + *m; + } else if (wntua && wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_zgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_zungqr_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*n << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = *n * *n + wrkbl; + minwrk = (*n << 1) + *m; + } + } else { + +/* Path 10 (M at least N, but not much larger) */ + + zgebrd_(m, n, &a[a_offset], lda, &s[1], dum, cdum, cdum, cdum, + &c_n1, &ierr); + lwork_zgebrd__ = (integer) cdum[0].r; + maxwrk = (*n << 1) + lwork_zgebrd__; + if (wntus || wntuo) { + zungbr_("Q", m, n, n, &a[a_offset], lda, cdum, cdum, & + c_n1, &ierr); + lwork_zungbr_q__ = (integer) cdum[0].r; +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*n << 1) + lwork_zungbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + if (wntua) { + zungbr_("Q", m, m, n, &a[a_offset], lda, cdum, cdum, & + c_n1, &ierr); + lwork_zungbr_q__ = (integer) cdum[0].r; +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*n << 1) + lwork_zungbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + if (! wntvn) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*n << 1) + lwork_zungbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + minwrk = (*n << 1) + *m; + } + } else if (minmn > 0) { + +/* Space needed for ZBDSQR is BDSPAC = 5*M */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "ZGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); +/* Compute space needed for ZGELQF */ + zgelqf_(m, n, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr); + lwork_zgelqf__ = (integer) cdum[0].r; +/* Compute space needed for ZUNGLQ */ + zunglq_(n, n, m, cdum, n, cdum, cdum, &c_n1, &ierr); + lwork_zunglq_n__ = (integer) cdum[0].r; + zunglq_(m, n, m, &a[a_offset], lda, cdum, cdum, &c_n1, &ierr); + lwork_zunglq_m__ = (integer) cdum[0].r; +/* Compute space needed for ZGEBRD */ + zgebrd_(m, m, &a[a_offset], lda, &s[1], dum, cdum, cdum, cdum, & + c_n1, &ierr); + lwork_zgebrd__ = (integer) cdum[0].r; +/* Compute space needed for ZUNGBR P */ + zungbr_("P", m, m, m, &a[a_offset], n, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_p__ = (integer) cdum[0].r; +/* Compute space needed for ZUNGBR Q */ + zungbr_("Q", m, m, m, &a[a_offset], n, cdum, cdum, &c_n1, &ierr); + lwork_zungbr_q__ = (integer) cdum[0].r; + if (*n >= mnthr) { + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ + + maxwrk = *m + lwork_zgelqf__; +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*m << 1) + lwork_zgebrd__; + maxwrk = f2cmax(i__2,i__3); + if (wntuo || wntuas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*m << 1) + lwork_zungbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + minwrk = *m * 3; + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ + + wrkbl = *m + lwork_zgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_zunglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n; + maxwrk = f2cmax(i__2,i__3); + minwrk = (*m << 1) + *n; + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='O') */ + + wrkbl = *m + lwork_zgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_zunglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n; + maxwrk = f2cmax(i__2,i__3); + minwrk = (*m << 1) + *n; + } else if (wntvs && wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ + + wrkbl = *m + lwork_zgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_zunglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = *m * *m + wrkbl; + minwrk = (*m << 1) + *n; + } else if (wntvs && wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ + + wrkbl = *m + lwork_zgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_zunglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = (*m << 1) * *m + wrkbl; + minwrk = (*m << 1) + *n; + } else if (wntvs && wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ + + wrkbl = *m + lwork_zgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_zunglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = *m * *m + wrkbl; + minwrk = (*m << 1) + *n; + } else if (wntva && wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ + + wrkbl = *m + lwork_zgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_zunglq_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = *m * *m + wrkbl; + minwrk = (*m << 1) + *n; + } else if (wntva && wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ + + wrkbl = *m + lwork_zgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_zunglq_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = (*m << 1) * *m + wrkbl; + minwrk = (*m << 1) + *n; + } else if (wntva && wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ + + wrkbl = *m + lwork_zgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_zunglq_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = (*m << 1) + lwork_zungbr_q__; + wrkbl = f2cmax(i__2,i__3); + maxwrk = *m * *m + wrkbl; + minwrk = (*m << 1) + *n; + } + } else { + +/* Path 10t(N greater than M, but not much larger) */ + + zgebrd_(m, n, &a[a_offset], lda, &s[1], dum, cdum, cdum, cdum, + &c_n1, &ierr); + lwork_zgebrd__ = (integer) cdum[0].r; + maxwrk = (*m << 1) + lwork_zgebrd__; + if (wntvs || wntvo) { +/* Compute space needed for ZUNGBR P */ + zungbr_("P", m, n, m, &a[a_offset], n, cdum, cdum, &c_n1, + &ierr); + lwork_zungbr_p__ = (integer) cdum[0].r; +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*m << 1) + lwork_zungbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + if (wntva) { + zungbr_("P", n, n, m, &a[a_offset], n, cdum, cdum, &c_n1, + &ierr); + lwork_zungbr_p__ = (integer) cdum[0].r; +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*m << 1) + lwork_zungbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + if (! wntun) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*m << 1) + lwork_zungbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + minwrk = (*m << 1) + *n; + } + } + maxwrk = f2cmax(maxwrk,minwrk); + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("ZGESVD", &i__2, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = sqrt(dlamch_("S")) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce using the QR */ +/* decomposition (if sufficient workspace available) */ + + if (*m >= mnthr) { + + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ +/* No left singular vectors to be computed */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: need 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out below R */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[a_dim1 + 2], + lda); + } + ie = 1; + itauq = 1; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + ncvt = 0; + if (wntvo || wntvas) { + +/* If right singular vectors desired, generate P'. */ +/* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__2, &ierr); + ncvt = *n; + } + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A if desired */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &rwork[ie], &a[ + a_offset], lda, cdum, &c__1, cdum, &c__1, &rwork[ + irwork], info); + +/* If right singular vectors desired in VT, copy them there */ + + if (wntvas) { + zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + } + + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ +/* N left singular vectors to be overwritten on A and */ +/* no right singular vectors to be computed */ + + if (*lwork >= *n * *n + *n * 3) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n; + if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n; + if (*lwork >= f2cmax(i__2,i__3) + *n * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N, WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy R to WORK(IR) and zero out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[ir + 1], & + ldwrkr); + +/* Generate Q in A */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], &i__2, & + ierr); + +/* Generate left vectors bidiagonalizing R */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ +/* (RWorkspace: need 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__2, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &c__0, n, &c__0, &s[1], &rwork[ie], cdum, + &c__1, &work[ir], &ldwrkr, cdum, &c__1, &rwork[ + irwork], info); + iu = itauq; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (CWorkspace: need N*N+N, prefer N*N+M*N) */ +/* (RWorkspace: 0) */ + + i__2 = *m; + i__3 = ldwrku; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = f2cmin(i__4,ldwrku); + zgemm_("N", "N", &chunk, n, n, &c_b2, &a[i__ + a_dim1] + , lda, &work[ir], &ldwrkr, &c_b1, &work[iu], & + ldwrku); + zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = 1; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) */ +/* (RWorkspace: N) */ + + i__3 = *lwork - iwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate left vectors bidiagonalizing A */ +/* (CWorkspace: need 3*N, prefer 2*N+N*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__3, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A */ +/* (CWorkspace: need 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &c__0, m, &c__0, &s[1], &rwork[ie], cdum, + &c__1, &a[a_offset], lda, cdum, &c__1, &rwork[ + irwork], info); + + } + + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */ +/* N left singular vectors to be overwritten on A and */ +/* N right singular vectors to be computed in VT */ + + if (*lwork >= *n * *n + *n * 3) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n; + if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n; + if (*lwork >= f2cmax(i__3,i__2) + *n * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N and WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__3, &ierr); + +/* Copy R to VT, zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__3 = *n - 1; + i__2 = *n - 1; + zlaset_("L", &i__3, &i__2, &c_b1, &c_b1, &vt[vt_dim1 + + 2], ldvt); + } + +/* Generate Q in A */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT, copying result to WORK(IR) */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__3 = *lwork - iwork + 1; + zgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], &i__3, & + ierr); + zlacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], & + ldwrkr); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zungbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__3, &ierr); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__3, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) and computing right */ +/* singular vectors of R in VT */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &work[ir], &ldwrkr, cdum, &c__1, + &rwork[irwork], info); + iu = itauq; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (CWorkspace: need N*N+N, prefer N*N+M*N) */ +/* (RWorkspace: 0) */ + + i__3 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = f2cmin(i__4,ldwrku); + zgemm_("N", "N", &chunk, n, n, &c_b2, &a[i__ + a_dim1] + , lda, &work[ir], &ldwrkr, &c_b1, &work[iu], & + ldwrku); + zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L20: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &vt[vt_dim1 + + 2], ldvt); + } + +/* Generate Q in A */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */ +/* (RWorkspace: N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], &i__2, & + ierr); + +/* Multiply Q in A by left vectors bidiagonalizing R */ +/* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, & + work[itauq], &a[a_offset], lda, &work[iwork], & + i__2, &ierr); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, cdum, &c__1, + &rwork[irwork], info); + + } + + } else if (wntus) { + + if (wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ +/* N left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + + if (*lwork >= *n * *n + *n * 3) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[ir + 1] + , &ldwrkr); + +/* Generate Q in A */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &c__0, n, &c__0, &s[1], &rwork[ie], + cdum, &c__1, &work[ir], &ldwrkr, cdum, &c__1, + &rwork[irwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in U */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, n, &c_b2, &a[a_offset], lda, & + work[ir], &ldwrkr, &c_b1, &u[u_offset], ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &c__0, m, &c__0, &s[1], &rwork[ie], + cdum, &c__1, &u[u_offset], ldu, cdum, &c__1, & + rwork[irwork], info); + + } + + } else if (wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + + if (*lwork >= (*n << 1) * *n + *n * 3) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 1] + , &ldwrku); + +/* Generate Q in A */ +/* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (CWorkspace: need 2*N*N+3*N, */ +/* prefer 2*N*N+2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &work[iu], &ldwrku, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + zlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (CWorkspace: need 2*N*N+3*N-1, */ +/* prefer 2*N*N+2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (CWorkspace: need 2*N*N) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, cdum, &c__1, + &rwork[irwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, n, &c_b2, &a[a_offset], lda, & + work[iu], &ldwrku, &c_b1, &u[u_offset], ldu); + +/* Copy right singular vectors of R to A */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: 0) */ + + zlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + +/* Generate right vectors bidiagonalizing R in A */ +/* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &a[ + a_offset], lda, &u[u_offset], ldu, cdum, & + c__1, &rwork[irwork], info); + + } + + } else if (wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' */ +/* or 'A') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + + if (*lwork >= *n * *n + *n * 3) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 1] + , &ldwrku); + +/* Generate Q in A */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &work[iu], &ldwrku, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + zlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (CWorkspace: need N*N+3*N-1, */ +/* prefer N*N+2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, cdum, & + c__1, &rwork[irwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, n, &c_b2, &a[a_offset], lda, & + work[iu], &ldwrku, &c_b1, &u[u_offset], ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &vt[ + vt_dim1 + 2], ldvt); + } + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &rwork[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, cdum, & + c__1, &rwork[irwork], info); + + } + + } + + } else if (wntua) { + + if (wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ +/* M left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n * 3; + if (*lwork >= *n * *n + f2cmax(i__2,i__3)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Copy R to WORK(IR), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[ir + 1] + , &ldwrkr); + +/* Generate Q in U */ +/* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &c__0, n, &c__0, &s[1], &rwork[ie], + cdum, &c__1, &work[ir], &ldwrkr, cdum, &c__1, + &rwork[irwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IR), storing result in A */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, n, &c_b2, &u[u_offset], ldu, & + work[ir], &ldwrkr, &c_b1, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (CWorkspace: need N+M, prefer N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &c__0, m, &c__0, &s[1], &rwork[ie], + cdum, &c__1, &u[u_offset], ldu, cdum, &c__1, & + rwork[irwork], info); + + } + + } else if (wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n * 3; + if (*lwork >= (*n << 1) * *n + f2cmax(i__2,i__3)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 1] + , &ldwrku); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (CWorkspace: need 2*N*N+3*N, */ +/* prefer 2*N*N+2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &work[iu], &ldwrku, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + zlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (CWorkspace: need 2*N*N+3*N-1, */ +/* prefer 2*N*N+2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (CWorkspace: need 2*N*N) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, cdum, &c__1, + &rwork[irwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, n, &c_b2, &u[u_offset], ldu, & + work[iu], &ldwrku, &c_b1, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Copy right singular vectors of R from WORK(IR) to A */ + + zlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (CWorkspace: need N+M, prefer N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + +/* Generate right bidiagonalizing vectors in A */ +/* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr); + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &a[ + a_offset], lda, &u[u_offset], ldu, cdum, & + c__1, &rwork[irwork], info); + + } + + } else if (wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' */ +/* or 'A') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n * 3; + if (*lwork >= *n * *n + f2cmax(i__2,i__3)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[iu + 1] + , &ldwrku); + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &work[iu], &ldwrku, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + zlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (CWorkspace: need N*N+3*N-1, */ +/* prefer N*N+2*N+(N-1)*NB) */ +/* (RWorkspace: need 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, n, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, cdum, & + c__1, &rwork[irwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (CWorkspace: need N*N) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, n, &c_b2, &u[u_offset], ldu, & + work[iu], &ldwrku, &c_b1, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (CWorkspace: need 2*N, prefer N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (CWorkspace: need N+M, prefer N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R from A to VT, zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &vt[ + vt_dim1 + 2], ldvt); + } + ie = 1; + itauq = itau; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &rwork[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + irwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, n, m, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, cdum, & + c__1, &rwork[irwork], info); + + } + + } + + } + + } else { + +/* M .LT. MNTHR */ + +/* Path 10 (M at least N, but not much larger) */ +/* Reduce to bidiagonal form without QR decomposition */ + + ie = 1; + itauq = 1; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) */ +/* (RWorkspace: need N) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB) */ +/* (RWorkspace: 0) */ + + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + if (wntus) { + ncu = *n; + } + if (wntua) { + ncu = *m; + } + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[iwork], &i__2, &ierr); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (CWorkspace: need 3*N, prefer 2*N+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr); + } + irwork = ie + *n; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, cdum, &c__1, & + rwork[irwork], info); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &a[ + a_offset], lda, &u[u_offset], ldu, cdum, &c__1, & + rwork[irwork], info); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, cdum, &c__1, & + rwork[irwork], info); + } + + } + + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce using the LQ decomposition (if */ +/* sufficient workspace available) */ + + if (*n >= mnthr) { + + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ +/* No right singular vectors to be computed */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 << 1) + 1] + , lda); + ie = 1; + itauq = 1; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + if (wntuo || wntuas) { + +/* If left singular vectors desired, generate Q */ +/* (CWorkspace: need 3*M, prefer 2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__2, &ierr); + } + irwork = ie + *m; + nru = 0; + if (wntuo || wntuas) { + nru = *m; + } + +/* Perform bidiagonal QR iteration, computing left singular */ +/* vectors of A in A if desired */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &rwork[ie], cdum, & + c__1, &a[a_offset], lda, cdum, &c__1, &rwork[irwork], + info); + +/* If left singular vectors desired in U, copy them there */ + + if (wntuas) { + zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + } + + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* no left singular vectors to be computed */ + + if (*lwork >= *m * *m + *m * 3) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n; + if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n; + if (*lwork >= f2cmax(i__2,i__3) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy L to WORK(IR) and zero out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in A */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], &i__2, & + ierr); + +/* Generate right vectors bidiagonalizing L */ +/* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, m, &c__0, &c__0, &s[1], &rwork[ie], &work[ + ir], &ldwrkr, cdum, &c__1, cdum, &c__1, &rwork[ + irwork], info); + iu = itauq; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (CWorkspace: need M*M+M, prefer M*M+M*N) */ +/* (RWorkspace: 0) */ + + i__2 = *n; + i__3 = chunk; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = f2cmin(i__4,chunk); + zgemm_("N", "N", m, &blk, m, &c_b2, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b1, & + work[iu], &ldwrku); + zlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda); +/* L30: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = 1; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) */ +/* (RWorkspace: need M) */ + + i__3 = *lwork - iwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate right vectors bidiagonalizing A */ +/* (CWorkspace: need 3*M, prefer 2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__3, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("L", m, n, &c__0, &c__0, &s[1], &rwork[ie], &a[ + a_offset], lda, cdum, &c__1, cdum, &c__1, &rwork[ + irwork], info); + + } + + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* M left singular vectors to be computed in U */ + + if (*lwork >= *m * *m + *m * 3) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n; + if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n; + if (*lwork >= f2cmax(i__3,i__2) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__3, &ierr); + +/* Copy L to U, zeroing about above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__3 = *m - 1; + i__2 = *m - 1; + zlaset_("U", &i__3, &i__2, &c_b1, &c_b1, &u[(u_dim1 << 1) + + 1], ldu); + +/* Generate Q in A */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U, copying result to WORK(IR) */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__3 = *lwork - iwork + 1; + zgebrd_(m, m, &u[u_offset], ldu, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + zlacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr); + +/* Generate right vectors bidiagonalizing L in WORK(IR) */ +/* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zungbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__3, &ierr); + +/* Generate left vectors bidiagonalizing L in U */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__3 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__3, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U, and computing right */ +/* singular vectors of L in WORK(IR) */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[ir], + &ldwrkr, &u[u_offset], ldu, cdum, &c__1, &rwork[ + irwork], info); + iu = itauq; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (CWorkspace: need M*M+M, prefer M*M+M*N)) */ +/* (RWorkspace: 0) */ + + i__3 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = f2cmin(i__4,chunk); + zgemm_("N", "N", m, &blk, m, &c_b2, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b1, & + work[iu], &ldwrku); + zlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda); +/* L40: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &u[(u_dim1 << 1) + + 1], ldu); + +/* Generate Q in A */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &u[u_offset], ldu, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in A */ +/* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("P", "L", "C", m, n, m, &u[u_offset], ldu, &work[ + itaup], &a[a_offset], lda, &work[iwork], &i__2, & + ierr); + +/* Generate left vectors bidiagonalizing L in U */ +/* (CWorkspace: need 3*M, prefer 2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &a[ + a_offset], lda, &u[u_offset], ldu, cdum, &c__1, & + rwork[irwork], info); + + } + + } else if (wntvs) { + + if (wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + + if (*lwork >= *m * *m + *m * 3) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IR), zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in A */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right vectors bidiagonalizing L in */ +/* WORK(IR) */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, m, &c__0, &c__0, &s[1], &rwork[ie], & + work[ir], &ldwrkr, cdum, &c__1, cdum, &c__1, & + rwork[irwork], info); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in A, storing result in VT */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, m, &c_b2, &work[ir], &ldwrkr, & + a[a_offset], lda, &c_b1, &vt[vt_offset], ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy result to VT */ + + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 << + 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("P", "L", "C", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, n, &c__0, &c__0, &s[1], &rwork[ie], & + vt[vt_offset], ldvt, cdum, &c__1, cdum, &c__1, + &rwork[irwork], info); + + } + + } else if (wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + + if (*lwork >= (*m << 1) * *m + *m * 3) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out below it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[iu + + ldwrku], &ldwrku); + +/* Generate Q in A */ +/* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (CWorkspace: need 2*M*M+3*M, */ +/* prefer 2*M*M+2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &work[iu], &ldwrku, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + zlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (CWorkspace: need 2*M*M+3*M-1, */ +/* prefer 2*M*M+2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (CWorkspace: need 2*M*M) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, cdum, &c__1, + &rwork[irwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, m, &c_b2, &work[iu], &ldwrku, & + a[a_offset], lda, &c_b1, &vt[vt_offset], ldvt); + +/* Copy left singular vectors of L to A */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: 0) */ + + zlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 << + 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("P", "L", "C", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors of L in A */ +/* (CWorkspace: need 3*M, prefer 2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, cdum, & + c__1, &rwork[irwork], info); + + } + + } else if (wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + + if (*lwork >= *m * *m + *m * 3) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is LDA by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[iu + + ldwrku], &ldwrku); + +/* Generate Q in A */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &work[iu], &ldwrku, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + zlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (CWorkspace: need M*M+3*M-1, */ +/* prefer M*M+2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, cdum, &c__1, + &rwork[irwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, m, &c_b2, &work[iu], &ldwrku, & + a[a_offset], lda, &c_b1, &vt[vt_offset], ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], + ldu); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &u[(u_dim1 << + 1) + 1], ldu); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &u[u_offset], ldu, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("P", "L", "C", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (CWorkspace: need 3*M, prefer 2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, cdum, & + c__1, &rwork[irwork], info); + + } + + } + + } else if (wntva) { + + if (wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m * 3; + if (*lwork >= *m * *m + f2cmax(i__2,i__3)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Copy L to WORK(IR), zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in VT */ +/* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (CWorkspace: need M*M+3*M-1, */ +/* prefer M*M+2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, m, &c__0, &c__0, &s[1], &rwork[ie], & + work[ir], &ldwrkr, cdum, &c__1, cdum, &c__1, & + rwork[irwork], info); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in VT, storing result in A */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, m, &c_b2, &work[ir], &ldwrkr, & + vt[vt_offset], ldvt, &c_b1, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (CWorkspace: need M+N, prefer M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 << + 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("P", "L", "C", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, n, &c__0, &c__0, &s[1], &rwork[ie], & + vt[vt_offset], ldvt, cdum, &c__1, cdum, &c__1, + &rwork[irwork], info); + + } + + } else if (wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m * 3; + if (*lwork >= (*m << 1) * *m + f2cmax(i__2,i__3)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[iu + + ldwrku], &ldwrku); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (CWorkspace: need 2*M*M+3*M, */ +/* prefer 2*M*M+2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &work[iu], &ldwrku, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + zlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (CWorkspace: need 2*M*M+3*M-1, */ +/* prefer 2*M*M+2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (CWorkspace: need 2*M*M) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, cdum, &c__1, + &rwork[irwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, m, &c_b2, &work[iu], &ldwrku, & + vt[vt_offset], ldvt, &c_b1, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Copy left singular vectors of A from WORK(IR) to A */ + + zlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (CWorkspace: need M+N, prefer M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &a[(a_dim1 << + 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("P", "L", "C", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in A */ +/* (CWorkspace: need 3*M, prefer 2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, cdum, & + c__1, &rwork[irwork], info); + + } + + } else if (wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m * 3; + if (*lwork >= *m * *m + f2cmax(i__2,i__3)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by M */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is M by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[iu + + ldwrku], &ldwrku); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &work[iu], &ldwrku, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + zlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, m, m, &c__0, &s[1], &rwork[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, cdum, &c__1, + &rwork[irwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (CWorkspace: need M*M) */ +/* (RWorkspace: 0) */ + + zgemm_("N", "N", m, n, m, &c_b2, &work[iu], &ldwrku, & + vt[vt_offset], ldvt, &c_b1, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (CWorkspace: need 2*M, prefer M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (CWorkspace: need M+N, prefer M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], + ldu); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &u[(u_dim1 << + 1) + 1], ldu); + ie = 1; + itauq = itau; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) */ +/* (RWorkspace: need M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, m, &u[u_offset], ldu, &s[1], &rwork[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zunmbr_("P", "L", "C", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (CWorkspace: need 3*M, prefer 2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + irwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("U", m, n, m, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, cdum, & + c__1, &rwork[irwork], info); + + } + + } + + } + + } else { + +/* N .LT. MNTHR */ + +/* Path 10t(N greater than M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ + + ie = 1; + itauq = 1; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) */ +/* (RWorkspace: M) */ + + i__2 = *lwork - iwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB) */ +/* (RWorkspace: 0) */ + + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + if (wntva) { + nrvt = *n; + } + if (wntvs) { + nrvt = *m; + } + i__2 = *lwork - iwork + 1; + zungbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (CWorkspace: need 3*M, prefer 2*M+M*NB) */ +/* (RWorkspace: 0) */ + + i__2 = *lwork - iwork + 1; + zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr); + } + irwork = ie + *m; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, cdum, &c__1, & + rwork[irwork], info); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &a[ + a_offset], lda, &u[u_offset], ldu, cdum, &c__1, & + rwork[irwork], info); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (CWorkspace: 0) */ +/* (RWorkspace: need BDSPAC) */ + + zbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &rwork[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, cdum, &c__1, & + rwork[irwork], info); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm > bignum) { + i__2 = minmn - 1; + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &rwork[ + ie], &minmn, &ierr); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm < smlnum) { + i__2 = minmn - 1; + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &rwork[ + ie], &minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + + return 0; + +/* End of ZGESVD */ + +} /* zgesvd_ */ + diff --git a/lapack-netlib/SRC/zgesvdq.c b/lapack-netlib/SRC/zgesvdq.c new file mode 100644 index 000000000..bcc7767b6 --- /dev/null +++ b/lapack-netlib/SRC/zgesvdq.c @@ -0,0 +1,2207 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method + for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGESVDQ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, */ +/* S, U, LDU, V, LDV, NUMRANK, IWORK, LIWORK, */ +/* CWORK, LCWORK, RWORK, LRWORK, INFO ) */ + +/* IMPLICIT NONE */ +/* CHARACTER JOBA, JOBP, JOBR, JOBU, JOBV */ +/* INTEGER M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LCWORK, LRWORK, */ +/* INFO */ +/* COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( * ) */ +/* DOUBLE PRECISION S( * ), RWORK( * ) */ +/* INTEGER IWORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* ZCGESVDQ computes the singular value decomposition (SVD) of a complex */ +/* > M-by-N matrix A, where M >= N. The SVD of A is written as */ +/* > [++] [xx] [x0] [xx] */ +/* > A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] */ +/* > [++] [xx] */ +/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */ +/* > matrix, and V is an N-by-N unitary matrix. The diagonal elements */ +/* > of SIGMA are the singular values of A. The columns of U and V are the */ +/* > left and the right singular vectors of A, respectively. */ +/* > \endverbatim */ + +/* Arguments */ +/* ========= */ + +/* > \param[in] JOBA */ +/* > \verbatim */ +/* > JOBA is CHARACTER*1 */ +/* > Specifies the level of accuracy in the computed SVD */ +/* > = 'A' The requested accuracy corresponds to having the backward */ +/* > error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F, */ +/* > where EPS = DLAMCH('Epsilon'). This authorises ZGESVDQ to */ +/* > truncate the computed triangular factor in a rank revealing */ +/* > QR factorization whenever the truncated part is below the */ +/* > threshold of the order of EPS * ||A||_F. This is aggressive */ +/* > truncation level. */ +/* > = 'M' Similarly as with 'A', but the truncation is more gentle: it */ +/* > is allowed only when there is a drop on the diagonal of the */ +/* > triangular factor in the QR factorization. This is medium */ +/* > truncation level. */ +/* > = 'H' High accuracy requested. No numerical rank determination based */ +/* > on the rank revealing QR factorization is attempted. */ +/* > = 'E' Same as 'H', and in addition the condition number of column */ +/* > scaled A is estimated and returned in RWORK(1). */ +/* > N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBP */ +/* > \verbatim */ +/* > JOBP is CHARACTER*1 */ +/* > = 'P' The rows of A are ordered in decreasing order with respect to */ +/* > ||A(i,:)||_\infty. This enhances numerical accuracy at the cost */ +/* > of extra data movement. Recommended for numerical robustness. */ +/* > = 'N' No row pivoting. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBR */ +/* > \verbatim */ +/* > JOBR is CHARACTER*1 */ +/* > = 'T' After the initial pivoted QR factorization, ZGESVD is applied to */ +/* > the adjoint R**H of the computed triangular factor R. This involves */ +/* > some extra data movement (matrix transpositions). Useful for */ +/* > experiments, research and development. */ +/* > = 'N' The triangular factor R is given as input to CGESVD. This may be */ +/* > preferred as it involves less data movement. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > = 'A' All M left singular vectors are computed and returned in the */ +/* > matrix U. See the description of U. */ +/* > = 'S' or 'U' N = f2cmin(M,N) left singular vectors are computed and returned */ +/* > in the matrix U. See the description of U. */ +/* > = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular */ +/* > vectors are computed and returned in the matrix U. */ +/* > = 'F' The N left singular vectors are returned in factored form as the */ +/* > product of the Q factor from the initial QR factorization and the */ +/* > N left singular vectors of (R**H , 0)**H. If row pivoting is used, */ +/* > then the necessary information on the row pivoting is stored in */ +/* > IWORK(N+1:N+M-1). */ +/* > = 'N' The left singular vectors are not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > = 'A', 'V' All N right singular vectors are computed and returned in */ +/* > the matrix V. */ +/* > = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular */ +/* > vectors are computed and returned in the matrix V. This option is */ +/* > allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal. */ +/* > = 'N' The right singular vectors are not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array of dimensions LDA x N */ +/* > On entry, the input matrix A. */ +/* > On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains */ +/* > the Householder vectors as stored by ZGEQP3. If JOBU = 'F', these Householder */ +/* > vectors together with CWORK(1:N) can be used to restore the Q factors from */ +/* > the initial pivoted QR factorization of A. See the description of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER. */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array of dimension N. */ +/* > The singular values of A, ordered so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is COMPLEX*16 array, dimension */ +/* > LDU x M if JOBU = 'A'; see the description of LDU. In this case, */ +/* > on exit, U contains the M left singular vectors. */ +/* > LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this */ +/* > case, U contains the leading N or the leading NUMRANK left singular vectors. */ +/* > LDU x N if JOBU = 'F' ; see the description of LDU. In this case U */ +/* > contains N x N unitary matrix that can be used to form the left */ +/* > singular vectors. */ +/* > If JOBU = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER. */ +/* > The leading dimension of the array U. */ +/* > If JOBU = 'A', 'S', 'U', 'R', LDU >= f2cmax(1,M). */ +/* > If JOBU = 'F', LDU >= f2cmax(1,N). */ +/* > Otherwise, LDU >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension */ +/* > LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' . */ +/* > If JOBV = 'A', or 'V', V contains the N-by-N unitary matrix V**H; */ +/* > If JOBV = 'R', V contains the first NUMRANK rows of V**H (the right */ +/* > singular vectors, stored rowwise, of the NUMRANK largest singular values). */ +/* > If JOBV = 'N' and JOBA = 'E', V is used as a workspace. */ +/* > If JOBV = 'N', and JOBA.NE.'E', V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If JOBV = 'A', 'V', 'R', or JOBA = 'E', LDV >= f2cmax(1,N). */ +/* > Otherwise, LDV >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] NUMRANK */ +/* > \verbatim */ +/* > NUMRANK is INTEGER */ +/* > NUMRANK is the numerical rank first determined after the rank */ +/* > revealing QR factorization, following the strategy specified by the */ +/* > value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK */ +/* > leading singular values and vectors are then requested in the call */ +/* > of CGESVD. The final value of NUMRANK might be further reduced if */ +/* > some singular values are computed as zeros. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (f2cmax(1, LIWORK)). */ +/* > On exit, IWORK(1:N) contains column pivoting permutation of the */ +/* > rank revealing QR factorization. */ +/* > If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence */ +/* > of row swaps used in row pivoting. These can be used to restore the */ +/* > left singular vectors in the case JOBU = 'F'. */ + +/* > If LIWORK, LCWORK, or LRWORK = -1, then on exit, if INFO = 0, */ +/* > LIWORK(1) returns the minimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > LIWORK >= N + M - 1, if JOBP = 'P'; */ +/* > LIWORK >= N if JOBP = 'N'. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates and returns the optimal and minimal sizes */ +/* > for the CWORK, IWORK, and RWORK arrays, and no error */ +/* > message related to LCWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] CWORK */ +/* > \verbatim */ +/* > CWORK is COMPLEX*12 array, dimension (f2cmax(2, LCWORK)), used as a workspace. */ +/* > On exit, if, on entry, LCWORK.NE.-1, CWORK(1:N) contains parameters */ +/* > needed to recover the Q factor from the QR factorization computed by */ +/* > ZGEQP3. */ + +/* > If LIWORK, LCWORK, or LRWORK = -1, then on exit, if INFO = 0, */ +/* > CWORK(1) returns the optimal LCWORK, and */ +/* > CWORK(2) returns the minimal LCWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] LCWORK */ +/* > \verbatim */ +/* > LCWORK is INTEGER */ +/* > The dimension of the array CWORK. It is determined as follows: */ +/* > Let LWQP3 = N+1, LWCON = 2*N, and let */ +/* > LWUNQ = { MAX( N, 1 ), if JOBU = 'R', 'S', or 'U' */ +/* > { MAX( M, 1 ), if JOBU = 'A' */ +/* > LWSVD = MAX( 3*N, 1 ) */ +/* > LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 3*(N/2), 1 ), LWUNLQ = MAX( N, 1 ), */ +/* > LWQRF = MAX( N/2, 1 ), LWUNQ2 = MAX( N, 1 ) */ +/* > Then the minimal value of LCWORK is: */ +/* > = MAX( N + LWQP3, LWSVD ) if only the singular values are needed; */ +/* > = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed, */ +/* > and a scaled condition estimate requested; */ +/* > */ +/* > = N + MAX( LWQP3, LWSVD, LWUNQ ) if the singular values and the left */ +/* > singular vectors are requested; */ +/* > = N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ) if the singular values and the left */ +/* > singular vectors are requested, and also */ +/* > a scaled condition estimate requested; */ +/* > */ +/* > = N + MAX( LWQP3, LWSVD ) if the singular values and the right */ +/* > singular vectors are requested; */ +/* > = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right */ +/* > singular vectors are requested, and also */ +/* > a scaled condition etimate requested; */ +/* > */ +/* > = N + MAX( LWQP3, LWSVD, LWUNQ ) if the full SVD is requested with JOBV = 'R'; */ +/* > independent of JOBR; */ +/* > = N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ) if the full SVD is requested, */ +/* > JOBV = 'R' and, also a scaled condition */ +/* > estimate requested; independent of JOBR; */ +/* > = MAX( N + MAX( LWQP3, LWSVD, LWUNQ ), */ +/* > N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWUNLQ, LWUNQ) ) if the */ +/* > full SVD is requested with JOBV = 'A' or 'V', and */ +/* > JOBR ='N' */ +/* > = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ), */ +/* > N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWUNLQ, LWUNQ ) ) */ +/* > if the full SVD is requested with JOBV = 'A' or 'V', and */ +/* > JOBR ='N', and also a scaled condition number estimate */ +/* > requested. */ +/* > = MAX( N + MAX( LWQP3, LWSVD, LWUNQ ), */ +/* > N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWUNQ2, LWUNQ ) ) if the */ +/* > full SVD is requested with JOBV = 'A', 'V', and JOBR ='T' */ +/* > = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWUNQ ), */ +/* > N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWUNQ2, LWUNQ ) ) */ +/* > if the full SVD is requested with JOBV = 'A', 'V' and */ +/* > JOBR ='T', and also a scaled condition number estimate */ +/* > requested. */ +/* > Finally, LCWORK must be at least two: LCWORK = MAX( 2, LCWORK ). */ +/* > */ +/* > If LCWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates and returns the optimal and minimal sizes */ +/* > for the CWORK, IWORK, and RWORK arrays, and no error */ +/* > message related to LCWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, LRWORK)). */ +/* > On exit, */ +/* > 1. If JOBA = 'E', RWORK(1) contains an estimate of the condition */ +/* > number of column scaled A. If A = C * D where D is diagonal and C */ +/* > has unit columns in the Euclidean norm, then, assuming full column rank, */ +/* > N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1). */ +/* > Otherwise, RWORK(1) = -1. */ +/* > 2. RWORK(2) contains the number of singular values computed as */ +/* > exact zeros in ZGESVD applied to the upper triangular or trapeziodal */ +/* > R (from the initial QR factorization). In case of early exit (no call to */ +/* > ZGESVD, such as in the case of zero matrix) RWORK(2) = -1. */ + +/* > If LIWORK, LCWORK, or LRWORK = -1, then on exit, if INFO = 0, */ +/* > RWORK(1) returns the minimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER. */ +/* > The dimension of the array RWORK. */ +/* > If JOBP ='P', then LRWORK >= MAX(2, M, 5*N); */ +/* > Otherwise, LRWORK >= MAX(2, 5*N). */ + +/* > If LRWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates and returns the optimal and minimal sizes */ +/* > for the CWORK, IWORK, and RWORK arrays, and no error */ +/* > message related to LCWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if ZBDSQR did not converge, INFO specifies how many superdiagonals */ +/* > of an intermediate bidiagonal form B (computed in ZGESVD) did not */ +/* > converge to zero. */ +/* > \endverbatim */ + +/* > \par Further Details: */ +/* ======================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > 1. The data movement (matrix transpose) is coded using simple nested */ +/* > DO-loops because BLAS and LAPACK do not provide corresponding subroutines. */ +/* > Those DO-loops are easily identified in this source code - by the CONTINUE */ +/* > statements labeled with 11**. In an optimized version of this code, the */ +/* > nested DO loops should be replaced with calls to an optimized subroutine. */ +/* > 2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause */ +/* > column norm overflow. This is the minial precaution and it is left to the */ +/* > SVD routine (CGESVD) to do its own preemptive scaling if potential over- */ +/* > or underflows are detected. To avoid repeated scanning of the array A, */ +/* > an optimal implementation would do all necessary scaling before calling */ +/* > CGESVD and the scaling in CGESVD can be switched off. */ +/* > 3. Other comments related to code optimization are given in comments in the */ +/* > code, enlosed in [[double brackets]]. */ +/* > \endverbatim */ + +/* > \par Bugs, examples and comments */ +/* =========================== */ + +/* > \verbatim */ +/* > Please report all bugs and send interesting examples and/or comments to */ +/* > drmac@math.hr. Thank you. */ +/* > \endverbatim */ + +/* > \par References */ +/* =============== */ + +/* > \verbatim */ +/* > [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for */ +/* > Computing the SVD with High Accuracy. ACM Trans. Math. Softw. */ +/* > 44(1): 11:1-11:30 (2017) */ +/* > */ +/* > SIGMA library, xGESVDQ section updated February 2016. */ +/* > Developed and coded by Zlatko Drmac, Department of Mathematics */ +/* > University of Zagreb, Croatia, drmac@math.hr */ +/* > \endverbatim */ + + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > Developed and coded by Zlatko Drmac, Department of Mathematics */ +/* > University of Zagreb, Croatia, drmac@math.hr */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2018 */ + +/* > \ingroup complex16GEsing */ + +/* ===================================================================== */ +/* Subroutine */ int zgesvdq_(char *joba, char *jobp, char *jobr, char *jobu, + char *jobv, integer *m, integer *n, doublecomplex *a, integer *lda, + doublereal *s, doublecomplex *u, integer *ldu, doublecomplex *v, + integer *ldv, integer *numrank, integer *iwork, integer *liwork, + doublecomplex *cwork, integer *lcwork, doublereal *rwork, integer * + lrwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, + i__3; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + integer lwrk_zunmlq__, lwrk_zunmqr__, ierr; + doublecomplex ctmp; + integer lwrk_zgesvd2__; + doublereal rtmp; + integer lwrk_zunmqr2__, optratio; + logical lsvc0, accla; + integer lwqp3; + logical acclh, acclm; + integer p, q; + logical conda; + extern logical lsame_(char *, char *); + logical lsvec; + doublereal sfmin, epsln; + integer lwcon; + logical rsvec; + integer lwlqf, lwqrf; + logical wntua; + integer n1, lwsvd; + logical dntwu, dntwv, wntuf, wntva; + integer lwunq; + logical wntur, wntus, wntvr; + extern /* Subroutine */ int zgeqp3_(integer *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, doublecomplex *, integer * + , doublereal *, integer *); + extern doublereal dznrm2_(integer *, doublecomplex *, integer *); + integer lwsvd2, lwunq2; + extern doublereal dlamch_(char *); + integer nr; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + doublereal sconda; + extern /* Subroutine */ int dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen), zdscal_(integer *, doublereal + *, doublecomplex *, integer *); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zlascl_(char *, integer *, integer *, doublereal *, doublereal + *, integer *, integer *, doublecomplex *, integer *, integer *); + doublecomplex cdummy[1]; + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zgesvd_(char *, char *, integer *, integer *, doublecomplex *, + integer *, doublereal *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *), zlacpy_(char *, integer + *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *), zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *); + integer minwrk; + logical rtrans; + extern /* Subroutine */ int zlapmt_(logical *, integer *, integer *, + doublecomplex *, integer *, integer *), zpocon_(char *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublereal *, integer *); + doublereal rdummy[1]; + logical lquery; + integer lwunlq; + extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, + integer *, integer *, integer *, integer *); + integer optwrk; + logical rowprm; + extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal big; + integer minwrk2; + logical ascaled; + integer optwrk2, lwrk_zgeqp3__, iminwrk, rminwrk, lwrk_zgelqf__, + lwrk_zgeqrf__, lwrk_zgesvd__; + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --iwork; + --cwork; + --rwork; + + /* Function Body */ + wntus = lsame_(jobu, "S") || lsame_(jobu, "U"); + wntur = lsame_(jobu, "R"); + wntua = lsame_(jobu, "A"); + wntuf = lsame_(jobu, "F"); + lsvc0 = wntus || wntur || wntua; + lsvec = lsvc0 || wntuf; + dntwu = lsame_(jobu, "N"); + + wntvr = lsame_(jobv, "R"); + wntva = lsame_(jobv, "A") || lsame_(jobv, "V"); + rsvec = wntvr || wntva; + dntwv = lsame_(jobv, "N"); + + accla = lsame_(joba, "A"); + acclm = lsame_(joba, "M"); + conda = lsame_(joba, "E"); + acclh = lsame_(joba, "H") || conda; + + rowprm = lsame_(jobp, "P"); + rtrans = lsame_(jobr, "T"); + + if (rowprm) { +/* Computing MAX */ + i__1 = 1, i__2 = *n + *m - 1; + iminwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = f2cmax(2,*m), i__2 = *n * 5; + rminwrk = f2cmax(i__1,i__2); + } else { + iminwrk = f2cmax(1,*n); +/* Computing MAX */ + i__1 = 2, i__2 = *n * 5; + rminwrk = f2cmax(i__1,i__2); + } + lquery = *liwork == -1 || *lcwork == -1 || *lrwork == -1; + *info = 0; + if (! (accla || acclm || acclh)) { + *info = -1; + } else if (! (rowprm || lsame_(jobp, "N"))) { + *info = -2; + } else if (! (rtrans || lsame_(jobr, "N"))) { + *info = -3; + } else if (! (lsvec || dntwu)) { + *info = -4; + } else if (wntur && wntva) { + *info = -5; + } else if (! (rsvec || dntwv)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || *n > *m) { + *info = -7; + } else if (*lda < f2cmax(1,*m)) { + *info = -9; + } else if (*ldu < 1 || lsvc0 && *ldu < *m || wntuf && *ldu < *n) { + *info = -12; + } else if (*ldv < 1 || rsvec && *ldv < *n || conda && *ldv < *n) { + *info = -14; + } else if (*liwork < iminwrk && ! lquery) { + *info = -17; + } + + + if (*info == 0) { +/* [[The expressions for computing the minimal and the optimal */ +/* values of LCWORK are written with a lot of redundancy and */ +/* can be simplified. However, this detailed form is easier for */ +/* maintenance and modifications of the code.]] */ + + lwqp3 = *n + 1; + if (wntus || wntur) { + lwunq = f2cmax(*n,1); + } else if (wntua) { + lwunq = f2cmax(*m,1); + } + lwcon = *n << 1; +/* Computing MAX */ + i__1 = *n * 3; + lwsvd = f2cmax(i__1,1); + if (lquery) { + zgeqp3_(m, n, &a[a_offset], lda, &iwork[1], cdummy, cdummy, &c_n1, + rdummy, &ierr); + lwrk_zgeqp3__ = (integer) cdummy[0].r; + if (wntus || wntur) { + zunmqr_("L", "N", m, n, n, &a[a_offset], lda, cdummy, &u[ + u_offset], ldu, cdummy, &c_n1, &ierr); + lwrk_zunmqr__ = (integer) cdummy[0].r; + } else if (wntua) { + zunmqr_("L", "N", m, m, n, &a[a_offset], lda, cdummy, &u[ + u_offset], ldu, cdummy, &c_n1, &ierr); + lwrk_zunmqr__ = (integer) cdummy[0].r; + } else { + lwrk_zunmqr__ = 0; + } + } + minwrk = 2; + optwrk = 2; + if (! (lsvec || rsvec)) { +/* only the singular values are requested */ + if (conda) { +/* Computing MAX */ + i__1 = *n + lwqp3, i__1 = f2cmax(i__1,lwcon); + minwrk = f2cmax(i__1,lwsvd); + } else { +/* Computing MAX */ + i__1 = *n + lwqp3; + minwrk = f2cmax(i__1,lwsvd); + } + if (lquery) { + zgesvd_("N", "N", n, n, &a[a_offset], lda, &s[1], &u[u_offset] + , ldu, &v[v_offset], ldv, cdummy, &c_n1, rdummy, & + ierr); + lwrk_zgesvd__ = (integer) cdummy[0].r; + if (conda) { +/* Computing MAX */ + i__1 = *n + lwrk_zgeqp3__, i__2 = *n + lwcon, i__1 = f2cmax( + i__1,i__2); + optwrk = f2cmax(i__1,lwrk_zgesvd__); + } else { +/* Computing MAX */ + i__1 = *n + lwrk_zgeqp3__; + optwrk = f2cmax(i__1,lwrk_zgesvd__); + } + } + } else if (lsvec && ! rsvec) { +/* singular values and the left singular vectors are requested */ + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwcon), i__1 = f2cmax(i__1,lwsvd); + minwrk = *n + f2cmax(i__1,lwunq); + } else { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwsvd); + minwrk = *n + f2cmax(i__1,lwunq); + } + if (lquery) { + if (rtrans) { + zgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, cdummy, &c_n1, + rdummy, &ierr); + } else { + zgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, cdummy, &c_n1, + rdummy, &ierr); + } + lwrk_zgesvd__ = (integer) cdummy[0].r; + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwrk_zgeqp3__,lwcon), i__1 = f2cmax(i__1, + lwrk_zgesvd__); + optwrk = *n + f2cmax(i__1,lwrk_zunmqr__); + } else { +/* Computing MAX */ + i__1 = f2cmax(lwrk_zgeqp3__,lwrk_zgesvd__); + optwrk = *n + f2cmax(i__1,lwrk_zunmqr__); + } + } + } else if (rsvec && ! lsvec) { +/* singular values and the right singular vectors are requested */ + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwcon); + minwrk = *n + f2cmax(i__1,lwsvd); + } else { + minwrk = *n + f2cmax(lwqp3,lwsvd); + } + if (lquery) { + if (rtrans) { + zgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, cdummy, &c_n1, + rdummy, &ierr); + } else { + zgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, cdummy, &c_n1, + rdummy, &ierr); + } + lwrk_zgesvd__ = (integer) cdummy[0].r; + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwrk_zgeqp3__,lwcon); + optwrk = *n + f2cmax(i__1,lwrk_zgesvd__); + } else { + optwrk = *n + f2cmax(lwrk_zgeqp3__,lwrk_zgesvd__); + } + } + } else { +/* full SVD is requested */ + if (rtrans) { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwsvd); + minwrk = f2cmax(i__1,lwunq); + if (conda) { + minwrk = f2cmax(minwrk,lwcon); + } + minwrk += *n; + if (wntva) { +/* Computing MAX */ + i__1 = *n / 2; + lwqrf = f2cmax(i__1,1); +/* Computing MAX */ + i__1 = *n / 2 * 3; + lwsvd2 = f2cmax(i__1,1); + lwunq2 = f2cmax(*n,1); +/* Computing MAX */ + i__1 = lwqp3, i__2 = *n / 2 + lwqrf, i__1 = f2cmax(i__1,i__2) + , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), + i__2 = *n / 2 + lwunq2, i__1 = f2cmax(i__1,i__2); + minwrk2 = f2cmax(i__1,lwunq); + if (conda) { + minwrk2 = f2cmax(minwrk2,lwcon); + } + minwrk2 = *n + minwrk2; + minwrk = f2cmax(minwrk,minwrk2); + } + } else { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwsvd); + minwrk = f2cmax(i__1,lwunq); + if (conda) { + minwrk = f2cmax(minwrk,lwcon); + } + minwrk += *n; + if (wntva) { +/* Computing MAX */ + i__1 = *n / 2; + lwlqf = f2cmax(i__1,1); +/* Computing MAX */ + i__1 = *n / 2 * 3; + lwsvd2 = f2cmax(i__1,1); + lwunlq = f2cmax(*n,1); +/* Computing MAX */ + i__1 = lwqp3, i__2 = *n / 2 + lwlqf, i__1 = f2cmax(i__1,i__2) + , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), + i__2 = *n / 2 + lwunlq, i__1 = f2cmax(i__1,i__2); + minwrk2 = f2cmax(i__1,lwunq); + if (conda) { + minwrk2 = f2cmax(minwrk2,lwcon); + } + minwrk2 = *n + minwrk2; + minwrk = f2cmax(minwrk,minwrk2); + } + } + if (lquery) { + if (rtrans) { + zgesvd_("O", "A", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, cdummy, &c_n1, + rdummy, &ierr); + lwrk_zgesvd__ = (integer) cdummy[0].r; +/* Computing MAX */ + i__1 = f2cmax(lwrk_zgeqp3__,lwrk_zgesvd__); + optwrk = f2cmax(i__1,lwrk_zunmqr__); + if (conda) { + optwrk = f2cmax(optwrk,lwcon); + } + optwrk = *n + optwrk; + if (wntva) { + i__1 = *n / 2; + zgeqrf_(n, &i__1, &u[u_offset], ldu, cdummy, cdummy, & + c_n1, &ierr); + lwrk_zgeqrf__ = (integer) cdummy[0].r; + i__1 = *n / 2; + i__2 = *n / 2; + zgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[ + 1], &u[u_offset], ldu, &v[v_offset], ldv, + cdummy, &c_n1, rdummy, &ierr); + lwrk_zgesvd2__ = (integer) cdummy[0].r; + i__1 = *n / 2; + zunmqr_("R", "C", n, n, &i__1, &u[u_offset], ldu, + cdummy, &v[v_offset], ldv, cdummy, &c_n1, & + ierr); + lwrk_zunmqr2__ = (integer) cdummy[0].r; +/* Computing MAX */ + i__1 = lwrk_zgeqp3__, i__2 = *n / 2 + lwrk_zgeqrf__, + i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + + lwrk_zgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = + *n / 2 + lwrk_zunmqr2__; + optwrk2 = f2cmax(i__1,i__2); + if (conda) { + optwrk2 = f2cmax(optwrk2,lwcon); + } + optwrk2 = *n + optwrk2; + optwrk = f2cmax(optwrk,optwrk2); + } + } else { + zgesvd_("S", "O", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, cdummy, &c_n1, + rdummy, &ierr); + lwrk_zgesvd__ = (integer) cdummy[0].r; +/* Computing MAX */ + i__1 = f2cmax(lwrk_zgeqp3__,lwrk_zgesvd__); + optwrk = f2cmax(i__1,lwrk_zunmqr__); + if (conda) { + optwrk = f2cmax(optwrk,lwcon); + } + optwrk = *n + optwrk; + if (wntva) { + i__1 = *n / 2; + zgelqf_(&i__1, n, &u[u_offset], ldu, cdummy, cdummy, & + c_n1, &ierr); + lwrk_zgelqf__ = (integer) cdummy[0].r; + i__1 = *n / 2; + i__2 = *n / 2; + zgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[ + 1], &u[u_offset], ldu, &v[v_offset], ldv, + cdummy, &c_n1, rdummy, &ierr); + lwrk_zgesvd2__ = (integer) cdummy[0].r; + i__1 = *n / 2; + zunmlq_("R", "N", n, n, &i__1, &u[u_offset], ldu, + cdummy, &v[v_offset], ldv, cdummy, &c_n1, & + ierr); + lwrk_zunmlq__ = (integer) cdummy[0].r; +/* Computing MAX */ + i__1 = lwrk_zgeqp3__, i__2 = *n / 2 + lwrk_zgelqf__, + i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + + lwrk_zgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = + *n / 2 + lwrk_zunmlq__; + optwrk2 = f2cmax(i__1,i__2); + if (conda) { + optwrk2 = f2cmax(optwrk2,lwcon); + } + optwrk2 = *n + optwrk2; + optwrk = f2cmax(optwrk,optwrk2); + } + } + } + } + + minwrk = f2cmax(2,minwrk); + optwrk = f2cmax(2,optwrk); + if (*lcwork < minwrk && ! lquery) { + *info = -19; + } + + } + + if (*info == 0 && *lrwork < rminwrk && ! lquery) { + *info = -21; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESVDQ", &i__1, (ftnlen)7); + return 0; + } else if (lquery) { + +/* Return optimal workspace */ + + iwork[1] = iminwrk; + cwork[1].r = (doublereal) optwrk, cwork[1].i = 0.; + cwork[2].r = (doublereal) minwrk, cwork[2].i = 0.; + rwork[1] = (doublereal) rminwrk; + return 0; + } + +/* Quick return if the matrix is void. */ + + if (*m == 0 || *n == 0) { + return 0; + } + + big = dlamch_("O"); + ascaled = FALSE_; + if (rowprm) { +/* ell-infinity norm - this enhances numerical robustness in */ +/* the case of differently scaled rows. */ + i__1 = *m; + for (p = 1; p <= i__1; ++p) { +/* RWORK(p) = ABS( A(p,IZAMAX(N,A(p,1),LDA)) ) */ +/* [[ZLANGE will return NaN if an entry of the p-th row is Nan]] */ + rwork[p] = zlange_("M", &c__1, n, &a[p + a_dim1], lda, rdummy); + if (rwork[p] != rwork[p] || rwork[p] * 0. != 0.) { + *info = -8; + i__2 = -(*info); + xerbla_("ZGESVDQ", &i__2, (ftnlen)7); + return 0; + } +/* L1904: */ + } + i__1 = *m - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *m - p + 1; + q = idamax_(&i__2, &rwork[p], &c__1) + p - 1; + iwork[*n + p] = q; + if (p != q) { + rtmp = rwork[p]; + rwork[p] = rwork[q]; + rwork[q] = rtmp; + } +/* L1952: */ + } + + if (rwork[1] == 0.) { +/* Quick return: A is the M x N zero matrix. */ + *numrank = 0; + dlaset_("G", n, &c__1, &c_b74, &c_b74, &s[1], n); + if (wntus) { + zlaset_("G", m, n, &c_b1, &c_b2, &u[u_offset], ldu) + ; + } + if (wntua) { + zlaset_("G", m, m, &c_b1, &c_b2, &u[u_offset], ldu) + ; + } + if (wntva) { + zlaset_("G", n, n, &c_b1, &c_b2, &v[v_offset], ldv) + ; + } + if (wntuf) { + zlaset_("G", n, &c__1, &c_b1, &c_b1, &cwork[1], n); + zlaset_("G", m, n, &c_b1, &c_b2, &u[u_offset], ldu) + ; + } + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + iwork[p] = p; +/* L5001: */ + } + if (rowprm) { + i__1 = *n + *m - 1; + for (p = *n + 1; p <= i__1; ++p) { + iwork[p] = p - *n; +/* L5002: */ + } + } + if (conda) { + rwork[1] = -1.; + } + rwork[2] = -1.; + return 0; + } + + if (rwork[1] > big / sqrt((doublereal) (*m))) { +/* matrix by 1/sqrt(M) if too large entry detected */ + d__1 = sqrt((doublereal) (*m)); + zlascl_("G", &c__0, &c__0, &d__1, &c_b87, m, n, &a[a_offset], lda, + &ierr); + ascaled = TRUE_; + } + i__1 = *m - 1; + zlaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[*n + 1], &c__1); + } + +/* norms overflows during the QR factorization. The SVD procedure should */ +/* have its own scaling to save the singular values from overflows and */ +/* underflows. That depends on the SVD procedure. */ + + if (! rowprm) { + rtmp = zlange_("M", m, n, &a[a_offset], lda, &rwork[1]); + if (rtmp != rtmp || rtmp * 0. != 0.) { + *info = -8; + i__1 = -(*info); + xerbla_("ZGESVDQ", &i__1, (ftnlen)7); + return 0; + } + if (rtmp > big / sqrt((doublereal) (*m))) { +/* matrix by 1/sqrt(M) if too large entry detected */ + d__1 = sqrt((doublereal) (*m)); + zlascl_("G", &c__0, &c__0, &d__1, &c_b87, m, n, &a[a_offset], lda, + &ierr); + ascaled = TRUE_; + } + } + + +/* A * P = Q * [ R ] */ +/* [ 0 ] */ + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + iwork[p] = 0; +/* L1963: */ + } + i__1 = *lcwork - *n; + zgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &cwork[1], &cwork[*n + 1], & + i__1, &rwork[1], &ierr); + +/* If the user requested accuracy level allows truncation in the */ +/* computed upper triangular factor, the matrix R is examined and, */ +/* if possible, replaced with its leading upper trapezoidal part. */ + + epsln = dlamch_("E"); + sfmin = dlamch_("S"); +/* SMALL = SFMIN / EPSLN */ + nr = *n; + + if (accla) { + +/* Standard absolute error bound suffices. All sigma_i with */ +/* sigma_i < N*EPS*||A||_F are flushed to zero. This is an */ +/* aggressive enforcement of lower numerical rank by introducing a */ +/* backward error of the order of N*EPS*||A||_F. */ + nr = 1; + rtmp = sqrt((doublereal) (*n)) * epsln; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if (z_abs(&a[p + p * a_dim1]) < rtmp * z_abs(&a[a_dim1 + 1])) { + goto L3002; + } + ++nr; +/* L3001: */ + } +L3002: + + ; + } else if (acclm) { +/* Sudden drop on the diagonal of R is used as the criterion for being */ +/* close-to-rank-deficient. The threshold is set to EPSLN=DLAMCH('E'). */ +/* [[This can be made more flexible by replacing this hard-coded value */ +/* with a user specified threshold.]] Also, the values that underflow */ +/* will be truncated. */ + nr = 1; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if (z_abs(&a[p + p * a_dim1]) < epsln * z_abs(&a[p - 1 + (p - 1) * + a_dim1]) || z_abs(&a[p + p * a_dim1]) < sfmin) { + goto L3402; + } + ++nr; +/* L3401: */ + } +L3402: + + ; + } else { +/* obvious case of zero pivots. */ +/* R(i,i)=0 => R(i:N,i:N)=0. */ + nr = 1; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if (z_abs(&a[p + p * a_dim1]) == 0.) { + goto L3502; + } + ++nr; +/* L3501: */ + } +L3502: + + if (conda) { +/* Estimate the scaled condition number of A. Use the fact that it is */ +/* the same as the scaled condition number of R. */ + zlacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv); +/* Only the leading NR x NR submatrix of the triangular factor */ +/* is considered. Only if NR=N will this give a reliable error */ +/* bound. However, even for NR < N, this can be used on an */ +/* expert level and obtain useful information in the sense of */ +/* perturbation theory. */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + rtmp = dznrm2_(&p, &v[p * v_dim1 + 1], &c__1); + d__1 = 1. / rtmp; + zdscal_(&p, &d__1, &v[p * v_dim1 + 1], &c__1); +/* L3053: */ + } + if (! (lsvec || rsvec)) { + zpocon_("U", &nr, &v[v_offset], ldv, &c_b87, &rtmp, &cwork[1], + &rwork[1], &ierr); + } else { + zpocon_("U", &nr, &v[v_offset], ldv, &c_b87, &rtmp, &cwork[*n + + 1], &rwork[1], &ierr); + } + sconda = 1. / sqrt(rtmp); +/* For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1), */ +/* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ +/* See the reference [1] for more details. */ + } + + } + + if (wntur) { + n1 = nr; + } else if (wntus || wntuf) { + n1 = *n; + } else if (wntua) { + n1 = *m; + } + + if (! (rsvec || lsvec)) { +/* ....................................................................... */ +/* ....................................................................... */ + if (rtrans) { + +/* the upper triangle of [A] to zero. */ + i__1 = f2cmin(*n,nr); + for (p = 1; p <= i__1; ++p) { + i__2 = p + p * a_dim1; + d_cnjg(&z__1, &a[p + p * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + i__3 = q + p * a_dim1; + d_cnjg(&z__1, &a[p + q * a_dim1]); + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + if (q <= nr) { + i__3 = p + q * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; + } +/* L1147: */ + } +/* L1146: */ + } + + zgesvd_("N", "N", n, &nr, &a[a_offset], lda, &s[1], &u[u_offset], + ldu, &v[v_offset], ldv, &cwork[1], lcwork, &rwork[1], + info); + + } else { + + + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("L", &i__1, &i__2, &c_b1, &c_b1, &a[a_dim1 + 2], lda); + } + zgesvd_("N", "N", &nr, n, &a[a_offset], lda, &s[1], &u[u_offset], + ldu, &v[v_offset], ldv, &cwork[1], lcwork, &rwork[1], + info); + + } + + } else if (lsvec && ! rsvec) { +/* ....................................................................... */ +/* ......................................................................."""""""" */ + if (rtrans) { +/* vectors of R */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + i__3 = q + p * u_dim1; + d_cnjg(&z__1, &a[p + q * a_dim1]); + u[i__3].r = z__1.r, u[i__3].i = z__1.i; +/* L1193: */ + } +/* L1192: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("U", &i__1, &i__2, &c_b1, &c_b1, &u[(u_dim1 << 1) + 1] + , ldu); + } +/* vectors overwrite [U](1:NR,1:NR) as conjugate transposed. These */ +/* will be pre-multiplied by Q to build the left singular vectors of A. */ + i__1 = *lcwork - *n; + zgesvd_("N", "O", n, &nr, &u[u_offset], ldu, &s[1], &u[u_offset], + ldu, &u[u_offset], ldu, &cwork[*n + 1], &i__1, &rwork[1], + info); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = p + p * u_dim1; + d_cnjg(&z__1, &u[p + p * u_dim1]); + u[i__2].r = z__1.r, u[i__2].i = z__1.i; + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + d_cnjg(&z__1, &u[q + p * u_dim1]); + ctmp.r = z__1.r, ctmp.i = z__1.i; + i__3 = q + p * u_dim1; + d_cnjg(&z__1, &u[p + q * u_dim1]); + u[i__3].r = z__1.r, u[i__3].i = z__1.i; + i__3 = p + q * u_dim1; + u[i__3].r = ctmp.r, u[i__3].i = ctmp.i; +/* L1120: */ + } +/* L1119: */ + } + + } else { + zlacpy_("U", &nr, n, &a[a_offset], lda, &u[u_offset], ldu); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("L", &i__1, &i__2, &c_b1, &c_b1, &u[u_dim1 + 2], ldu); + } +/* vectors overwrite [U](1:NR,1:NR) */ + i__1 = *lcwork - *n; + zgesvd_("O", "N", &nr, n, &u[u_offset], ldu, &s[1], &u[u_offset], + ldu, &v[v_offset], ldv, &cwork[*n + 1], &i__1, &rwork[1], + info); +/* R. These will be pre-multiplied by Q to build the left singular */ +/* vectors of A. */ + } + +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + zlaset_("A", &i__1, &nr, &c_b1, &c_b1, &u[nr + 1 + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + zlaset_("A", &nr, &i__1, &c_b1, &c_b1, &u[(nr + 1) * u_dim1 + + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + zlaset_("A", &i__1, &i__2, &c_b1, &c_b2, &u[nr + 1 + (nr + 1) + * u_dim1], ldu); + } + } + +/* The Q matrix from the first QRF is built into the left singular */ +/* vectors matrix U. */ + + if (! wntuf) { + i__1 = *lcwork - *n; + zunmqr_("L", "N", m, &n1, n, &a[a_offset], lda, &cwork[1], &u[ + u_offset], ldu, &cwork[*n + 1], &i__1, &ierr); + } + if (rowprm && ! wntuf) { + i__1 = *m - 1; + zlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], & + c_n1); + } + + } else if (rsvec && ! lsvec) { +/* ....................................................................... */ +/* ....................................................................... */ + if (rtrans) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + i__3 = q + p * v_dim1; + d_cnjg(&z__1, &a[p + q * a_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; +/* L1166: */ + } +/* L1165: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("U", &i__1, &i__2, &c_b1, &c_b1, &v[(v_dim1 << 1) + 1] + , ldv); + } +/* vectors not computed */ + if (wntvr || nr == *n) { + i__1 = *lcwork - *n; + zgesvd_("O", "N", n, &nr, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &u[u_offset], ldu, &cwork[*n + 1], & + i__1, &rwork[1], info); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = p + p * v_dim1; + d_cnjg(&z__1, &v[p + p * v_dim1]); + v[i__2].r = z__1.r, v[i__2].i = z__1.i; + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + d_cnjg(&z__1, &v[q + p * v_dim1]); + ctmp.r = z__1.r, ctmp.i = z__1.i; + i__3 = q + p * v_dim1; + d_cnjg(&z__1, &v[p + q * v_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; + i__3 = p + q * v_dim1; + v[i__3].r = ctmp.r, v[i__3].i = ctmp.i; +/* L1122: */ + } +/* L1121: */ + } + + if (nr < *n) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = nr + 1; q <= i__2; ++q) { + i__3 = p + q * v_dim1; + d_cnjg(&z__1, &v[q + p * v_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; +/* L1104: */ + } +/* L1103: */ + } + } + zlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); + } else { +/* [!] This is simple implementation that augments [V](1:N,1:NR) */ +/* by padding a zero block. In the case NR << N, a more efficient */ +/* way is to first use the QR factorization. For more details */ +/* how to implement this, see the " FULL SVD " branch. */ + i__1 = *n - nr; + zlaset_("G", n, &i__1, &c_b1, &c_b1, &v[(nr + 1) * v_dim1 + 1] + , ldv); + i__1 = *lcwork - *n; + zgesvd_("O", "N", n, n, &v[v_offset], ldv, &s[1], &u[u_offset] + , ldu, &u[u_offset], ldu, &cwork[*n + 1], &i__1, & + rwork[1], info); + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + i__2 = p + p * v_dim1; + d_cnjg(&z__1, &v[p + p * v_dim1]); + v[i__2].r = z__1.r, v[i__2].i = z__1.i; + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + d_cnjg(&z__1, &v[q + p * v_dim1]); + ctmp.r = z__1.r, ctmp.i = z__1.i; + i__3 = q + p * v_dim1; + d_cnjg(&z__1, &v[p + q * v_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; + i__3 = p + q * v_dim1; + v[i__3].r = ctmp.r, v[i__3].i = ctmp.i; +/* L1124: */ + } +/* L1123: */ + } + zlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); + } + + } else { + zlacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("L", &i__1, &i__2, &c_b1, &c_b1, &v[v_dim1 + 2], ldv); + } +/* vectors stored in U(1:NR,1:NR) */ + if (wntvr || nr == *n) { + i__1 = *lcwork - *n; + zgesvd_("N", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &cwork[*n + 1], & + i__1, &rwork[1], info); + zlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); + } else { +/* [!] This is simple implementation that augments [V](1:NR,1:N) */ +/* by padding a zero block. In the case NR << N, a more efficient */ +/* way is to first use the LQ factorization. For more details */ +/* how to implement this, see the " FULL SVD " branch. */ + i__1 = *n - nr; + zlaset_("G", &i__1, n, &c_b1, &c_b1, &v[nr + 1 + v_dim1], ldv); + i__1 = *lcwork - *n; + zgesvd_("N", "O", n, n, &v[v_offset], ldv, &s[1], &u[u_offset] + , ldu, &v[v_offset], ldv, &cwork[*n + 1], &i__1, & + rwork[1], info); + zlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); + } +/* vectors of A. */ + } + + } else { +/* ....................................................................... */ +/* ....................................................................... */ + if (rtrans) { + + + if (wntvr || nr == *n) { +/* vectors of R**H */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + i__3 = q + p * v_dim1; + d_cnjg(&z__1, &a[p + q * a_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; +/* L1169: */ + } +/* L1168: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("U", &i__1, &i__2, &c_b1, &c_b1, &v[(v_dim1 << 1) + + 1], ldv); + } + +/* singular vectors of R**H stored in [U](1:NR,1:NR) as conjugate */ +/* transposed */ + i__1 = *lcwork - *n; + zgesvd_("O", "A", n, &nr, &v[v_offset], ldv, &s[1], &v[ + v_offset], ldv, &u[u_offset], ldu, &cwork[*n + 1], & + i__1, &rwork[1], info); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = p + p * v_dim1; + d_cnjg(&z__1, &v[p + p * v_dim1]); + v[i__2].r = z__1.r, v[i__2].i = z__1.i; + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + d_cnjg(&z__1, &v[q + p * v_dim1]); + ctmp.r = z__1.r, ctmp.i = z__1.i; + i__3 = q + p * v_dim1; + d_cnjg(&z__1, &v[p + q * v_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; + i__3 = p + q * v_dim1; + v[i__3].r = ctmp.r, v[i__3].i = ctmp.i; +/* L1116: */ + } +/* L1115: */ + } + if (nr < *n) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = nr + 1; q <= i__2; ++q) { + i__3 = p + q * v_dim1; + d_cnjg(&z__1, &v[q + p * v_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; +/* L1102: */ + } +/* L1101: */ + } + } + zlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = p + p * u_dim1; + d_cnjg(&z__1, &u[p + p * u_dim1]); + u[i__2].r = z__1.r, u[i__2].i = z__1.i; + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + d_cnjg(&z__1, &u[q + p * u_dim1]); + ctmp.r = z__1.r, ctmp.i = z__1.i; + i__3 = q + p * u_dim1; + d_cnjg(&z__1, &u[p + q * u_dim1]); + u[i__3].r = z__1.r, u[i__3].i = z__1.i; + i__3 = p + q * u_dim1; + u[i__3].r = ctmp.r, u[i__3].i = ctmp.i; +/* L1118: */ + } +/* L1117: */ + } + + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + zlaset_("A", &i__1, &nr, &c_b1, &c_b1, &u[nr + 1 + u_dim1] + , ldu); + if (nr < n1) { + i__1 = n1 - nr; + zlaset_("A", &nr, &i__1, &c_b1, &c_b1, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + zlaset_("A", &i__1, &i__2, &c_b1, &c_b2, &u[nr + 1 + ( + nr + 1) * u_dim1], ldu); + } + } + + } else { +/* vectors of R**H */ +/* [[The optimal ratio N/NR for using QRF instead of padding */ +/* with zeros. Here hard coded to 2; it must be at least */ +/* two due to work space constraints.]] */ +/* OPTRATIO = ILAENV(6, 'ZGESVD', 'S' // 'O', NR,N,0,0) */ +/* OPTRATIO = MAX( OPTRATIO, 2 ) */ + optratio = 2; + if (optratio * nr > *n) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + i__3 = q + p * v_dim1; + d_cnjg(&z__1, &a[p + q * a_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; +/* L1199: */ + } +/* L1198: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("U", &i__1, &i__2, &c_b1, &c_b1, &v[(v_dim1 << + 1) + 1], ldv); + } + + i__1 = *n - nr; + zlaset_("A", n, &i__1, &c_b1, &c_b1, &v[(nr + 1) * v_dim1 + + 1], ldv); + i__1 = *lcwork - *n; + zgesvd_("O", "A", n, n, &v[v_offset], ldv, &s[1], &v[ + v_offset], ldv, &u[u_offset], ldu, &cwork[*n + 1], + &i__1, &rwork[1], info); + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + i__2 = p + p * v_dim1; + d_cnjg(&z__1, &v[p + p * v_dim1]); + v[i__2].r = z__1.r, v[i__2].i = z__1.i; + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + d_cnjg(&z__1, &v[q + p * v_dim1]); + ctmp.r = z__1.r, ctmp.i = z__1.i; + i__3 = q + p * v_dim1; + d_cnjg(&z__1, &v[p + q * v_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; + i__3 = p + q * v_dim1; + v[i__3].r = ctmp.r, v[i__3].i = ctmp.i; +/* L1114: */ + } +/* L1113: */ + } + zlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* (M x N1), i.e. (M x N) or (M x M). */ + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + i__2 = p + p * u_dim1; + d_cnjg(&z__1, &u[p + p * u_dim1]); + u[i__2].r = z__1.r, u[i__2].i = z__1.i; + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + d_cnjg(&z__1, &u[q + p * u_dim1]); + ctmp.r = z__1.r, ctmp.i = z__1.i; + i__3 = q + p * u_dim1; + d_cnjg(&z__1, &u[p + q * u_dim1]); + u[i__3].r = z__1.r, u[i__3].i = z__1.i; + i__3 = p + q * u_dim1; + u[i__3].r = ctmp.r, u[i__3].i = ctmp.i; +/* L1112: */ + } +/* L1111: */ + } + + if (*n < *m && ! wntuf) { + i__1 = *m - *n; + zlaset_("A", &i__1, n, &c_b1, &c_b1, &u[*n + 1 + + u_dim1], ldu); + if (*n < n1) { + i__1 = n1 - *n; + zlaset_("A", n, &i__1, &c_b1, &c_b1, &u[(*n + 1) * + u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + zlaset_("A", &i__1, &i__2, &c_b1, &c_b2, &u[*n + + 1 + (*n + 1) * u_dim1], ldu); + } + } + } else { +/* singular vectors of R */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + i__3 = q + (nr + p) * u_dim1; + d_cnjg(&z__1, &a[p + q * a_dim1]); + u[i__3].r = z__1.r, u[i__3].i = z__1.i; +/* L1197: */ + } +/* L1196: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("U", &i__1, &i__2, &c_b1, &c_b1, &u[(nr + 2) * + u_dim1 + 1], ldu); + } + i__1 = *lcwork - *n - nr; + zgeqrf_(n, &nr, &u[(nr + 1) * u_dim1 + 1], ldu, &cwork[*n + + 1], &cwork[*n + nr + 1], &i__1, &ierr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = 1; q <= i__2; ++q) { + i__3 = q + p * v_dim1; + d_cnjg(&z__1, &u[p + (nr + q) * u_dim1]); + v[i__3].r = z__1.r, v[i__3].i = z__1.i; +/* L1144: */ + } +/* L1143: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("U", &i__1, &i__2, &c_b1, &c_b1, &v[(v_dim1 << 1) + + 1], ldv); + i__1 = *lcwork - *n - nr; + zgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &cwork[*n + nr + + 1], &i__1, &rwork[1], info); + i__1 = *n - nr; + zlaset_("A", &i__1, &nr, &c_b1, &c_b1, &v[nr + 1 + v_dim1] + , ldv); + i__1 = *n - nr; + zlaset_("A", &nr, &i__1, &c_b1, &c_b1, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + zlaset_("A", &i__1, &i__2, &c_b1, &c_b2, &v[nr + 1 + (nr + + 1) * v_dim1], ldv); + i__1 = *lcwork - *n - nr; + zunmqr_("R", "C", n, n, &nr, &u[(nr + 1) * u_dim1 + 1], + ldu, &cwork[*n + 1], &v[v_offset], ldv, &cwork[*n + + nr + 1], &i__1, &ierr); + zlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + zlaset_("A", &i__1, &nr, &c_b1, &c_b1, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + zlaset_("A", &nr, &i__1, &c_b1, &c_b1, &u[(nr + 1) + * u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + zlaset_("A", &i__1, &i__2, &c_b1, &c_b2, &u[nr + + 1 + (nr + 1) * u_dim1], ldu); + } + } + } + } + + } else { + + + if (wntvr || nr == *n) { + zlacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("L", &i__1, &i__2, &c_b1, &c_b1, &v[v_dim1 + 2], + ldv); + } +/* singular vectors of R stored in [U](1:NR,1:NR) */ + i__1 = *lcwork - *n; + zgesvd_("S", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &cwork[*n + 1], & + i__1, &rwork[1], info); + zlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + zlaset_("A", &i__1, &nr, &c_b1, &c_b1, &u[nr + 1 + u_dim1] + , ldu); + if (nr < n1) { + i__1 = n1 - nr; + zlaset_("A", &nr, &i__1, &c_b1, &c_b1, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + zlaset_("A", &i__1, &i__2, &c_b1, &c_b2, &u[nr + 1 + ( + nr + 1) * u_dim1], ldu); + } + } + + } else { +/* is then N1 (N or M) */ +/* [[The optimal ratio N/NR for using LQ instead of padding */ +/* with zeros. Here hard coded to 2; it must be at least */ +/* two due to work space constraints.]] */ +/* OPTRATIO = ILAENV(6, 'ZGESVD', 'S' // 'O', NR,N,0,0) */ +/* OPTRATIO = MAX( OPTRATIO, 2 ) */ + optratio = 2; + if (optratio * nr > *n) { + zlacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("L", &i__1, &i__2, &c_b1, &c_b1, &v[v_dim1 + + 2], ldv); + } +/* singular vectors of R stored in [U](1:NR,1:NR) */ + i__1 = *n - nr; + zlaset_("A", &i__1, n, &c_b1, &c_b1, &v[nr + 1 + v_dim1], + ldv); + i__1 = *lcwork - *n; + zgesvd_("S", "O", n, n, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &cwork[*n + 1], + &i__1, &rwork[1], info); + zlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* singular vectors of A. The leading N left singular vectors */ +/* are in [U](1:N,1:N) */ +/* (M x N1), i.e. (M x N) or (M x M). */ + if (*n < *m && ! wntuf) { + i__1 = *m - *n; + zlaset_("A", &i__1, n, &c_b1, &c_b1, &u[*n + 1 + + u_dim1], ldu); + if (*n < n1) { + i__1 = n1 - *n; + zlaset_("A", n, &i__1, &c_b1, &c_b1, &u[(*n + 1) * + u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + zlaset_("A", &i__1, &i__2, &c_b1, &c_b2, &u[*n + + 1 + (*n + 1) * u_dim1], ldu); + } + } + } else { + zlacpy_("U", &nr, n, &a[a_offset], lda, &u[nr + 1 + + u_dim1], ldu); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("L", &i__1, &i__2, &c_b1, &c_b1, &u[nr + 2 + + u_dim1], ldu); + } + i__1 = *lcwork - *n - nr; + zgelqf_(&nr, n, &u[nr + 1 + u_dim1], ldu, &cwork[*n + 1], + &cwork[*n + nr + 1], &i__1, &ierr); + zlacpy_("L", &nr, &nr, &u[nr + 1 + u_dim1], ldu, &v[ + v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + zlaset_("U", &i__1, &i__2, &c_b1, &c_b1, &v[(v_dim1 << + 1) + 1], ldv); + } + i__1 = *lcwork - *n - nr; + zgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &cwork[*n + nr + + 1], &i__1, &rwork[1], info); + i__1 = *n - nr; + zlaset_("A", &i__1, &nr, &c_b1, &c_b1, &v[nr + 1 + v_dim1] + , ldv); + i__1 = *n - nr; + zlaset_("A", &nr, &i__1, &c_b1, &c_b1, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + zlaset_("A", &i__1, &i__2, &c_b1, &c_b2, &v[nr + 1 + (nr + + 1) * v_dim1], ldv); + i__1 = *lcwork - *n - nr; + zunmlq_("R", "N", n, n, &nr, &u[nr + 1 + u_dim1], ldu, & + cwork[*n + 1], &v[v_offset], ldv, &cwork[*n + nr + + 1], &i__1, &ierr); + zlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + zlaset_("A", &i__1, &nr, &c_b1, &c_b1, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + zlaset_("A", &nr, &i__1, &c_b1, &c_b1, &u[(nr + 1) + * u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + zlaset_("A", &i__1, &i__2, &c_b1, &c_b2, &u[nr + + 1 + (nr + 1) * u_dim1], ldu); + } + } + } + } + } + +/* The Q matrix from the first QRF is built into the left singular */ +/* vectors matrix U. */ + + if (! wntuf) { + i__1 = *lcwork - *n; + zunmqr_("L", "N", m, &n1, n, &a[a_offset], lda, &cwork[1], &u[ + u_offset], ldu, &cwork[*n + 1], &i__1, &ierr); + } + if (rowprm && ! wntuf) { + i__1 = *m - 1; + zlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], & + c_n1); + } + +/* ... end of the "full SVD" branch */ + } + +/* Check whether some singular values are returned as zeros, e.g. */ +/* due to underflow, and update the numerical rank. */ + p = nr; + for (q = p; q >= 1; --q) { + if (s[q] > 0.) { + goto L4002; + } + --nr; +/* L4001: */ + } +L4002: + +/* singular values are set to zero. */ + if (nr < *n) { + i__1 = *n - nr; + dlaset_("G", &i__1, &c__1, &c_b74, &c_b74, &s[nr + 1], n); + } +/* values. */ + if (ascaled) { + d__1 = sqrt((doublereal) (*m)); + dlascl_("G", &c__0, &c__0, &c_b87, &d__1, &nr, &c__1, &s[1], n, &ierr); + } + if (conda) { + rwork[1] = sconda; + } + rwork[2] = (doublereal) (p - nr); +/* exact zeros in ZGESVD() applied to the (possibly truncated) */ +/* full row rank triangular (trapezoidal) factor of A. */ + *numrank = nr; + + return 0; + +/* End of ZGESVDQ */ + +} /* zgesvdq_ */ + diff --git a/lapack-netlib/SRC/zgesvdx.c b/lapack-netlib/SRC/zgesvdx.c new file mode 100644 index 000000000..3627bf3a8 --- /dev/null +++ b/lapack-netlib/SRC/zgesvdx.c @@ -0,0 +1,1388 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGESVDX computes the singular value decomposition (SVD) for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGESVDX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, */ +/* $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, */ +/* $ LWORK, RWORK, IWORK, INFO ) */ + + +/* CHARACTER JOBU, JOBVT, RANGE */ +/* INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS */ +/* DOUBLE PRECISION VL, VU */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION S( * ), RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGESVDX computes the singular value decomposition (SVD) of a complex */ +/* > M-by-N matrix A, optionally computing the left and/or right singular */ +/* > vectors. The SVD is written */ +/* > */ +/* > A = U * SIGMA * transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > f2cmin(m,n) diagonal elements, U is an M-by-M unitary matrix, and */ +/* > V is an N-by-N unitary matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first f2cmin(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > ZGESVDX uses an eigenvalue problem for obtaining the SVD, which */ +/* > allows for the computation of a subset of singular values and */ +/* > vectors. See DBDSVDX for details. */ +/* > */ +/* > Note that the routine returns V**T, not V. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'V': the first f2cmin(m,n) columns of U (the left singular */ +/* > vectors) or as specified by RANGE are returned in */ +/* > the array U; */ +/* > = 'N': no columns of U (no left singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVT */ +/* > \verbatim */ +/* > JOBVT is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix */ +/* > V**T: */ +/* > = 'V': the first f2cmin(m,n) rows of V**T (the right singular */ +/* > vectors) or as specified by RANGE are returned in */ +/* > the array VT; */ +/* > = 'N': no rows of V**T (no right singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all singular values will be found. */ +/* > = 'V': all singular values in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th singular values will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the contents of A are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for singular values. VU > VL. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for singular values. VU > VL. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest singular value to be returned. */ +/* > 1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest singular value to be returned. */ +/* > 1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] NS */ +/* > \verbatim */ +/* > NS is INTEGER */ +/* > The total number of singular values found, */ +/* > 0 <= NS <= f2cmin(M,N). */ +/* > If RANGE = 'A', NS = f2cmin(M,N); if RANGE = 'I', NS = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is COMPLEX*16 array, dimension (LDU,UCOL) */ +/* > If JOBU = 'V', U contains columns of U (the left singular */ +/* > vectors, stored columnwise) as specified by RANGE; if */ +/* > JOBU = 'N', U is not referenced. */ +/* > Note: The user must ensure that UCOL >= NS; if RANGE = 'V', */ +/* > the exact value of NS is not known in advance and an upper */ +/* > bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; if */ +/* > JOBU = 'V', LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is COMPLEX*16 array, dimension (LDVT,N) */ +/* > If JOBVT = 'V', VT contains the rows of V**T (the right singular */ +/* > vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N', */ +/* > VT is not referenced. */ +/* > Note: The user must ensure that LDVT >= NS; if RANGE = 'V', */ +/* > the exact value of NS is not known in advance and an upper */ +/* > bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; if */ +/* > JOBVT = 'V', LDVT >= NS (see above). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see */ +/* > comments inside the code): */ +/* > - PATH 1 (M much larger than N) */ +/* > - PATH 1t (N much larger than M) */ +/* > LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths. */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) */ +/* > LRWORK >= MIN(M,N)*(MIN(M,N)*2+15*MIN(M,N)). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (12*MIN(M,N)) */ +/* > If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0, */ +/* > then IWORK contains the indices of the eigenvectors that failed */ +/* > to converge in DBDSVDX/DSTEVX. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge */ +/* > in DBDSVDX/DSTEVX. */ +/* > if INFO = N*2 + 1, an internal error occurred in */ +/* > DBDSVDX */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16GEsing */ + +/* ===================================================================== */ +/* Subroutine */ int zgesvdx_(char *jobu, char *jobvt, char *range, integer * + m, integer *n, doublecomplex *a, integer *lda, doublereal *vl, + doublereal *vu, integer *il, integer *iu, integer *ns, doublereal *s, + doublecomplex *u, integer *ldu, doublecomplex *vt, integer *ldvt, + doublecomplex *work, integer *lwork, doublereal *rwork, integer * + iwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], + i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + char ch__1[2]; + + /* Local variables */ + integer iscl; + logical alls, inds; + integer ilqf; + doublereal anrm; + integer ierr, iqrf, itau; + char jobz[1]; + logical vals; + integer i__, j, k; + extern logical lsame_(char *, char *); + integer iltgk, itemp, minmn, itaup, itauq, iutgk, itgkz, mnthr; + logical wantu; + integer id, ie; + extern doublereal dlamch_(char *); + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), zgebrd_(integer *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + doublereal bignum, abstol; + extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zlascl_(char *, integer *, integer *, doublereal *, doublereal + *, integer *, integer *, doublecomplex *, integer *, integer *); + char rngtgk[1]; + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + integer itempr; + extern /* Subroutine */ int zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + integer minwrk, maxwrk; + doublereal smlnum; + extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ); + logical lquery, wantvt; + extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal dum[1], eps; + extern /* Subroutine */ int dbdsvdx_(char *, char *, char *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *ns = 0; + *info = 0; + abstol = dlamch_("S") * 2; + lquery = *lwork == -1; + minmn = f2cmin(*m,*n); + wantu = lsame_(jobu, "V"); + wantvt = lsame_(jobvt, "V"); + if (wantu || wantvt) { + *(unsigned char *)jobz = 'V'; + } else { + *(unsigned char *)jobz = 'N'; + } + alls = lsame_(range, "A"); + vals = lsame_(range, "V"); + inds = lsame_(range, "I"); + + *info = 0; + if (! lsame_(jobu, "V") && ! lsame_(jobu, "N")) { + *info = -1; + } else if (! lsame_(jobvt, "V") && ! lsame_(jobvt, + "N")) { + *info = -2; + } else if (! (alls || vals || inds)) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*m > *lda) { + *info = -7; + } else if (minmn > 0) { + if (vals) { + if (*vl < 0.) { + *info = -8; + } else if (*vu <= *vl) { + *info = -9; + } + } else if (inds) { + if (*il < 1 || *il > f2cmax(1,minmn)) { + *info = -10; + } else if (*iu < f2cmin(minmn,*il) || *iu > minmn) { + *info = -11; + } + } + if (*info == 0) { + if (wantu && *ldu < *m) { + *info = -15; + } else if (wantvt) { + if (inds) { + if (*ldvt < *iu - *il + 1) { + *info = -17; + } + } else if (*ldvt < minmn) { + *info = -17; + } + } + } + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (minmn > 0) { + if (*m >= *n) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "ZGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + if (*m >= mnthr) { + +/* Path 1 (M much larger than N) */ + + minwrk = *n * (*n + 5); + maxwrk = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * *n + (*n << 1) + (*n << 1) * + ilaenv_(&c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + if (wantu || wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * *n + (*n << 1) + *n * + ilaenv_(&c__1, "ZUNMQR", "LN", n, n, n, &c_n1, + (ftnlen)6, (ftnlen)2); + maxwrk = f2cmax(i__2,i__3); + } + } else { + +/* Path 2 (M at least N, but not much larger) */ + + minwrk = *n * 3 + *m; + maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wantu || wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMQR", "LN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)2); + maxwrk = f2cmax(i__2,i__3); + } + } + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "ZGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + if (*n >= mnthr) { + +/* Path 1t (N much larger than M) */ + + minwrk = *m * (*m + 5); + maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * *m + (*m << 1) + (*m << 1) * + ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + if (wantu || wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * *m + (*m << 1) + *m * + ilaenv_(&c__1, "ZUNMQR", "LN", m, m, m, &c_n1, + (ftnlen)6, (ftnlen)2); + maxwrk = f2cmax(i__2,i__3); + } + } else { + +/* Path 2t (N greater than M, but not much larger) */ + + + minwrk = *m * 3 + *n; + maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wantu || wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMQR", "LN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)2); + maxwrk = f2cmax(i__2,i__3); + } + } + } + } + maxwrk = f2cmax(maxwrk,minwrk); + d__1 = (doublereal) maxwrk; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + + if (*lwork < minwrk && ! lquery) { + *info = -19; + } + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("ZGESVDX", &i__2, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Set singular values indices accord to RANGE='A'. */ + + if (alls) { + *(unsigned char *)rngtgk = 'I'; + iltgk = 1; + iutgk = f2cmin(*m,*n); + } else if (inds) { + *(unsigned char *)rngtgk = 'I'; + iltgk = *il; + iutgk = *iu; + } else { + *(unsigned char *)rngtgk = 'V'; + iltgk = 0; + iutgk = 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = sqrt(dlamch_("S")) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + } else if (anrm > bignum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce A using the QR */ +/* decomposition. */ + + if (*m >= mnthr) { + +/* Path 1 (M much larger than N): */ +/* A = Q * R = Q * ( QB * B * PB**T ) */ +/* = Q * ( QB * ( UB * S * VB**T ) * PB**T ) */ +/* U = Q * QB * UB; V**T = VB**T * PB**T */ + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + itau = 1; + itemp = itau + *n; + i__2 = *lwork - itemp + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2, + info); + +/* Copy R into WORK and bidiagonalize it: */ +/* (Workspace: need N*N+3*N, prefer N*N+N+2*N*NB) */ + + iqrf = itemp; + itauq = itemp + *n * *n; + itaup = itauq + *n; + itemp = itaup + *n; + id = 1; + ie = id + *n; + itgkz = ie + *n; + zlacpy_("U", n, n, &a[a_offset], lda, &work[iqrf], n); + i__2 = *n - 1; + i__3 = *n - 1; + zlaset_("L", &i__2, &i__3, &c_b1, &c_b1, &work[iqrf + 1], n); + i__2 = *lwork - itemp + 1; + zgebrd_(n, n, &work[iqrf], n, &rwork[id], &rwork[ie], &work[itauq] + , &work[itaup], &work[itemp], &i__2, info); + itempr = itgkz + *n * ((*n << 1) + 1); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 2*N*N+14*N) */ + + i__2 = *n << 1; + dbdsvdx_("U", jobz, rngtgk, n, &rwork[id], &rwork[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &rwork[itgkz], &i__2, &rwork[ + itempr], &iwork[1], info) + ; + +/* If needed, compute left singular vectors. */ + + if (wantu) { + k = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = j + i__ * u_dim1; + i__5 = k; + z__1.r = rwork[i__5], z__1.i = 0.; + u[i__4].r = z__1.r, u[i__4].i = z__1.i; + ++k; + } + k += *n; + } + i__2 = *m - *n; + zlaset_("A", &i__2, ns, &c_b1, &c_b1, &u[*n + 1 + u_dim1], + ldu); + +/* Call ZUNMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + zunmbr_("Q", "L", "N", n, ns, n, &work[iqrf], n, &work[itauq], + &u[u_offset], ldu, &work[itemp], &i__2, info); + +/* Call ZUNMQR to compute Q*(QB*UB). */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + zunmqr_("L", "N", m, ns, n, &a[a_offset], lda, &work[itau], & + u[u_offset], ldu, &work[itemp], &i__2, info); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + k = itgkz + *n; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = i__ + j * vt_dim1; + i__5 = k; + z__1.r = rwork[i__5], z__1.i = 0.; + vt[i__4].r = z__1.r, vt[i__4].i = z__1.i; + ++k; + } + k += *n; + } + +/* Call ZUNMBR to compute VB**T * PB**T */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + zunmbr_("P", "R", "C", ns, n, n, &work[iqrf], n, &work[itaup], + &vt[vt_offset], ldvt, &work[itemp], &i__2, info); + } + } else { + +/* Path 2 (M at least N, but not much larger) */ +/* Reduce A to bidiagonal form without QR decomposition */ +/* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */ +/* U = QB * UB; V**T = VB**T * PB**T */ + +/* Bidiagonalize A */ +/* (Workspace: need 2*N+M, prefer 2*N+(M+N)*NB) */ + + itauq = 1; + itaup = itauq + *n; + itemp = itaup + *n; + id = 1; + ie = id + *n; + itgkz = ie + *n; + i__2 = *lwork - itemp + 1; + zgebrd_(m, n, &a[a_offset], lda, &rwork[id], &rwork[ie], &work[ + itauq], &work[itaup], &work[itemp], &i__2, info); + itempr = itgkz + *n * ((*n << 1) + 1); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 2*N*N+14*N) */ + + i__2 = *n << 1; + dbdsvdx_("U", jobz, rngtgk, n, &rwork[id], &rwork[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &rwork[itgkz], &i__2, &rwork[ + itempr], &iwork[1], info) + ; + +/* If needed, compute left singular vectors. */ + + if (wantu) { + k = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = j + i__ * u_dim1; + i__5 = k; + z__1.r = rwork[i__5], z__1.i = 0.; + u[i__4].r = z__1.r, u[i__4].i = z__1.i; + ++k; + } + k += *n; + } + i__2 = *m - *n; + zlaset_("A", &i__2, ns, &c_b1, &c_b1, &u[*n + 1 + u_dim1], + ldu); + +/* Call ZUNMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + zunmbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[itemp], &i__2, &ierr); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + k = itgkz + *n; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = i__ + j * vt_dim1; + i__5 = k; + z__1.r = rwork[i__5], z__1.i = 0.; + vt[i__4].r = z__1.r, vt[i__4].i = z__1.i; + ++k; + } + k += *n; + } + +/* Call ZUNMBR to compute VB**T * PB**T */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + zunmbr_("P", "R", "C", ns, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, & + ierr); + } + } + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce A using the LQ decomposition. */ + + if (*n >= mnthr) { + +/* Path 1t (N much larger than M): */ +/* A = L * Q = ( QB * B * PB**T ) * Q */ +/* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q */ +/* U = QB * UB ; V**T = VB**T * PB**T * Q */ + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + itau = 1; + itemp = itau + *m; + i__2 = *lwork - itemp + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2, + info); +/* Copy L into WORK and bidiagonalize it: */ +/* (Workspace in WORK( ITEMP ): need M*M+3*M, prefer M*M+M+2*M*NB) */ + + ilqf = itemp; + itauq = ilqf + *m * *m; + itaup = itauq + *m; + itemp = itaup + *m; + id = 1; + ie = id + *m; + itgkz = ie + *m; + zlacpy_("L", m, m, &a[a_offset], lda, &work[ilqf], m); + i__2 = *m - 1; + i__3 = *m - 1; + zlaset_("U", &i__2, &i__3, &c_b1, &c_b1, &work[ilqf + *m], m); + i__2 = *lwork - itemp + 1; + zgebrd_(m, m, &work[ilqf], m, &rwork[id], &rwork[ie], &work[itauq] + , &work[itaup], &work[itemp], &i__2, info); + itempr = itgkz + *m * ((*m << 1) + 1); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 2*M*M+14*M) */ + + i__2 = *m << 1; + dbdsvdx_("U", jobz, rngtgk, m, &rwork[id], &rwork[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &rwork[itgkz], &i__2, &rwork[ + itempr], &iwork[1], info) + ; + +/* If needed, compute left singular vectors. */ + + if (wantu) { + k = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m; + for (j = 1; j <= i__3; ++j) { + i__4 = j + i__ * u_dim1; + i__5 = k; + z__1.r = rwork[i__5], z__1.i = 0.; + u[i__4].r = z__1.r, u[i__4].i = z__1.i; + ++k; + } + k += *m; + } + +/* Call ZUNMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + zunmbr_("Q", "L", "N", m, ns, m, &work[ilqf], m, &work[itauq], + &u[u_offset], ldu, &work[itemp], &i__2, info); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + k = itgkz + *m; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m; + for (j = 1; j <= i__3; ++j) { + i__4 = i__ + j * vt_dim1; + i__5 = k; + z__1.r = rwork[i__5], z__1.i = 0.; + vt[i__4].r = z__1.r, vt[i__4].i = z__1.i; + ++k; + } + k += *m; + } + i__2 = *n - *m; + zlaset_("A", ns, &i__2, &c_b1, &c_b1, &vt[(*m + 1) * vt_dim1 + + 1], ldvt); + +/* Call ZUNMBR to compute (VB**T)*(PB**T) */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + zunmbr_("P", "R", "C", ns, m, m, &work[ilqf], m, &work[itaup], + &vt[vt_offset], ldvt, &work[itemp], &i__2, info); + +/* Call ZUNMLQ to compute ((VB**T)*(PB**T))*Q. */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + zunmlq_("R", "N", ns, n, m, &a[a_offset], lda, &work[itau], & + vt[vt_offset], ldvt, &work[itemp], &i__2, info); + } + } else { + +/* Path 2t (N greater than M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ +/* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */ +/* U = QB * UB; V**T = VB**T * PB**T */ + +/* Bidiagonalize A */ +/* (Workspace: need 2*M+N, prefer 2*M+(M+N)*NB) */ + + itauq = 1; + itaup = itauq + *m; + itemp = itaup + *m; + id = 1; + ie = id + *m; + itgkz = ie + *m; + i__2 = *lwork - itemp + 1; + zgebrd_(m, n, &a[a_offset], lda, &rwork[id], &rwork[ie], &work[ + itauq], &work[itaup], &work[itemp], &i__2, info); + itempr = itgkz + *m * ((*m << 1) + 1); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 2*M*M+14*M) */ + + i__2 = *m << 1; + dbdsvdx_("L", jobz, rngtgk, m, &rwork[id], &rwork[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &rwork[itgkz], &i__2, &rwork[ + itempr], &iwork[1], info) + ; + +/* If needed, compute left singular vectors. */ + + if (wantu) { + k = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m; + for (j = 1; j <= i__3; ++j) { + i__4 = j + i__ * u_dim1; + i__5 = k; + z__1.r = rwork[i__5], z__1.i = 0.; + u[i__4].r = z__1.r, u[i__4].i = z__1.i; + ++k; + } + k += *m; + } + +/* Call ZUNMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + zunmbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[itemp], &i__2, info); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + k = itgkz + *m; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m; + for (j = 1; j <= i__3; ++j) { + i__4 = i__ + j * vt_dim1; + i__5 = k; + z__1.r = rwork[i__5], z__1.i = 0.; + vt[i__4].r = z__1.r, vt[i__4].i = z__1.i; + ++k; + } + k += *m; + } + i__2 = *n - *m; + zlaset_("A", ns, &i__2, &c_b1, &c_b1, &vt[(*m + 1) * vt_dim1 + + 1], ldvt); + +/* Call ZUNMBR to compute VB**T * PB**T */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + zunmbr_("P", "R", "C", ns, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, + info); + } + } + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + } + +/* Return optimal workspace in WORK(1) */ + + d__1 = (doublereal) maxwrk; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + + return 0; + +/* End of ZGESVDX */ + +} /* zgesvdx_ */ + diff --git a/lapack-netlib/SRC/zgesvj.c b/lapack-netlib/SRC/zgesvj.c new file mode 100644 index 000000000..f62fdbcae --- /dev/null +++ b/lapack-netlib/SRC/zgesvj.c @@ -0,0 +1,2153 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGESVJ */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGESVJ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, */ +/* LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDV, LWORK, LRWORK, M, MV, N */ +/* CHARACTER*1 JOBA, JOBU, JOBV */ +/* COMPLEX*16 A( LDA, * ), V( LDV, * ), CWORK( LWORK ) */ +/* DOUBLE PRECISION RWORK( LRWORK ), SVA( N ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGESVJ computes the singular value decomposition (SVD) of a complex */ +/* > M-by-N matrix A, where M >= N. The SVD of A is written as */ +/* > [++] [xx] [x0] [xx] */ +/* > A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] */ +/* > [++] [xx] */ +/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */ +/* > matrix, and V is an N-by-N unitary matrix. The diagonal elements */ +/* > of SIGMA are the singular values of A. The columns of U and V are the */ +/* > left and the right singular vectors of A, respectively. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBA */ +/* > \verbatim */ +/* > JOBA is CHARACTER*1 */ +/* > Specifies the structure of A. */ +/* > = 'L': The input matrix A is lower triangular; */ +/* > = 'U': The input matrix A is upper triangular; */ +/* > = 'G': The input matrix A is general M-by-N matrix, M >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies whether to compute the left singular vectors */ +/* > (columns of U): */ +/* > = 'U' or 'F': The left singular vectors corresponding to the nonzero */ +/* > singular values are computed and returned in the leading */ +/* > columns of A. See more details in the description of A. */ +/* > The default numerical orthogonality threshold is set to */ +/* > approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=DLAMCH('E'). */ +/* > = 'C': Analogous to JOBU='U', except that user can control the */ +/* > level of numerical orthogonality of the computed left */ +/* > singular vectors. TOL can be set to TOL = CTOL*EPS, where */ +/* > CTOL is given on input in the array WORK. */ +/* > No CTOL smaller than ONE is allowed. CTOL greater */ +/* > than 1 / EPS is meaningless. The option 'C' */ +/* > can be used if M*EPS is satisfactory orthogonality */ +/* > of the computed left singular vectors, so CTOL=M could */ +/* > save few sweeps of Jacobi rotations. */ +/* > See the descriptions of A and WORK(1). */ +/* > = 'N': The matrix U is not computed. However, see the */ +/* > description of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether to compute the right singular vectors, that */ +/* > is, the matrix V: */ +/* > = 'V' or 'J': the matrix V is computed and returned in the array V */ +/* > = 'A': the Jacobi rotations are applied to the MV-by-N */ +/* > array V. In other words, the right singular vector */ +/* > matrix V is not computed explicitly; instead it is */ +/* > applied to an MV-by-N matrix initially stored in the */ +/* > first MV rows of V. */ +/* > = 'N': the matrix V is not computed and the array V is not */ +/* > referenced */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. */ +/* > M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > If JOBU = 'U' .OR. JOBU = 'C': */ +/* > If INFO = 0 : */ +/* > RANKA orthonormal columns of U are returned in the */ +/* > leading RANKA columns of the array A. Here RANKA <= N */ +/* > is the number of computed singular values of A that are */ +/* > above the underflow threshold DLAMCH('S'). The singular */ +/* > vectors corresponding to underflowed or zero singular */ +/* > values are not computed. The value of RANKA is returned */ +/* > in the array RWORK as RANKA=NINT(RWORK(2)). Also see the */ +/* > descriptions of SVA and RWORK. The computed columns of U */ +/* > are mutually numerically orthogonal up to approximately */ +/* > TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU = 'C'), */ +/* > see the description of JOBU. */ +/* > If INFO > 0, */ +/* > the procedure ZGESVJ did not converge in the given number */ +/* > of iterations (sweeps). In that case, the computed */ +/* > columns of U may not be orthogonal up to TOL. The output */ +/* > U (stored in A), SIGMA (given by the computed singular */ +/* > values in SVA(1:N)) and V is still a decomposition of the */ +/* > input matrix A in the sense that the residual */ +/* > || A - SCALE * U * SIGMA * V^* ||_2 / ||A||_2 is small. */ +/* > If JOBU = 'N': */ +/* > If INFO = 0 : */ +/* > Note that the left singular vectors are 'for free' in the */ +/* > one-sided Jacobi SVD algorithm. However, if only the */ +/* > singular values are needed, the level of numerical */ +/* > orthogonality of U is not an issue and iterations are */ +/* > stopped when the columns of the iterated matrix are */ +/* > numerically orthogonal up to approximately M*EPS. Thus, */ +/* > on exit, A contains the columns of U scaled with the */ +/* > corresponding singular values. */ +/* > If INFO > 0: */ +/* > the procedure ZGESVJ did not converge in the given number */ +/* > of iterations (sweeps). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SVA */ +/* > \verbatim */ +/* > SVA is DOUBLE PRECISION array, dimension (N) */ +/* > On exit, */ +/* > If INFO = 0 : */ +/* > depending on the value SCALE = RWORK(1), we have: */ +/* > If SCALE = ONE: */ +/* > SVA(1:N) contains the computed singular values of A. */ +/* > During the computation SVA contains the Euclidean column */ +/* > norms of the iterated matrices in the array A. */ +/* > If SCALE .NE. ONE: */ +/* > The singular values of A are SCALE*SVA(1:N), and this */ +/* > factored representation is due to the fact that some of the */ +/* > singular values of A might underflow or overflow. */ +/* > */ +/* > If INFO > 0: */ +/* > the procedure ZGESVJ did not converge in the given number of */ +/* > iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MV */ +/* > \verbatim */ +/* > MV is INTEGER */ +/* > If JOBV = 'A', then the product of Jacobi rotations in ZGESVJ */ +/* > is applied to the first MV rows of V. See the description of JOBV. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension (LDV,N) */ +/* > If JOBV = 'V', then V contains on exit the N-by-N matrix of */ +/* > the right singular vectors; */ +/* > If JOBV = 'A', then V contains the product of the computed right */ +/* > singular vector matrix and the initial matrix in */ +/* > the array V. */ +/* > If JOBV = 'N', then V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V', then LDV >= f2cmax(1,N). */ +/* > If JOBV = 'A', then LDV >= f2cmax(1,MV) . */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] CWORK */ +/* > \verbatim */ +/* > CWORK is COMPLEX*16 array, dimension (f2cmax(1,LWORK)) */ +/* > Used as workspace. */ +/* > If on entry LWORK = -1, then a workspace query is assumed and */ +/* > no computation is done; CWORK(1) is set to the minial (and optimal) */ +/* > length of CWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER. */ +/* > Length of CWORK, LWORK >= M+N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(6,LRWORK)) */ +/* > On entry, */ +/* > If JOBU = 'C' : */ +/* > RWORK(1) = CTOL, where CTOL defines the threshold for convergence. */ +/* > The process stops if all columns of A are mutually */ +/* > orthogonal up to CTOL*EPS, EPS=DLAMCH('E'). */ +/* > It is required that CTOL >= ONE, i.e. it is not */ +/* > allowed to force the routine to obtain orthogonality */ +/* > below EPSILON. */ +/* > On exit, */ +/* > RWORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) */ +/* > are the computed singular values of A. */ +/* > (See description of SVA().) */ +/* > RWORK(2) = NINT(RWORK(2)) is the number of the computed nonzero */ +/* > singular values. */ +/* > RWORK(3) = NINT(RWORK(3)) is the number of the computed singular */ +/* > values that are larger than the underflow threshold. */ +/* > RWORK(4) = NINT(RWORK(4)) is the number of sweeps of Jacobi */ +/* > rotations needed for numerical convergence. */ +/* > RWORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. */ +/* > This is useful information in cases when ZGESVJ did */ +/* > not converge, as it can be used to estimate whether */ +/* > the output is still useful and for post festum analysis. */ +/* > RWORK(6) = the largest absolute value over all sines of the */ +/* > Jacobi rotation angles in the last sweep. It can be */ +/* > useful for a post festum analysis. */ +/* > If on entry LRWORK = -1, then a workspace query is assumed and */ +/* > no computation is done; RWORK(1) is set to the minial (and optimal) */ +/* > length of RWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > Length of RWORK, LRWORK >= MAX(6,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value */ +/* > > 0: ZGESVJ did not converge in the maximal allowed number */ +/* > (NSWEEP=30) of sweeps. The output may still be useful. */ +/* > See the description of RWORK. */ +/* > \endverbatim */ +/* > */ +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane */ +/* > rotations. In the case of underflow of the tangent of the Jacobi angle, a */ +/* > modified Jacobi transformation of Drmac [3] is used. Pivot strategy uses */ +/* > column interchanges of de Rijk [1]. The relative accuracy of the computed */ +/* > singular values and the accuracy of the computed singular vectors (in */ +/* > angle metric) is as guaranteed by the theory of Demmel and Veselic [2]. */ +/* > The condition number that determines the accuracy in the full rank case */ +/* > is essentially min_{D=diag} kappa(A*D), where kappa(.) is the */ +/* > spectral condition number. The best performance of this Jacobi SVD */ +/* > procedure is achieved if used in an accelerated version of Drmac and */ +/* > Veselic [4,5], and it is the kernel routine in the SIGMA library [6]. */ +/* > Some tunning parameters (marked with [TP]) are available for the */ +/* > implementer. */ +/* > The computational range for the nonzero singular values is the machine */ +/* > number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even */ +/* > denormalized singular values can be computed with the corresponding */ +/* > gradual loss of accurate digits. */ +/* > \endverbatim */ + +/* > \par Contributor: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ============ */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) */ +/* > */ +/* > \endverbatim */ + +/* > \par References: */ +/* ================ */ +/* > */ +/* > \verbatim */ +/* > */ +/* > [1] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the */ +/* > singular value decomposition on a vector computer. */ +/* > SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. */ +/* > [2] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. */ +/* > [3] Z. Drmac: Implementation of Jacobi rotations for accurate singular */ +/* > value computation in floating point arithmetic. */ +/* > SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. */ +/* > [4] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */ +/* > LAPACK Working note 169. */ +/* > [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */ +/* > LAPACK Working note 170. */ +/* > [6] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */ +/* > QSVD, (H,K)-SVD computations. */ +/* > Department of Mathematics, University of Zagreb, 2008, 2015. */ +/* > \endverbatim */ + +/* > \par Bugs, examples and comments: */ +/* ================================= */ +/* > */ +/* > \verbatim */ +/* > =========================== */ +/* > Please report all bugs and send interesting test examples and comments to */ +/* > drmac@math.hr. Thank you. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgesvj_(char *joba, char *jobu, char *jobv, integer *m, + integer *n, doublecomplex *a, integer *lda, doublereal *sva, integer * + mv, doublecomplex *v, integer *ldv, doublecomplex *cwork, integer * + lwork, doublereal *rwork, integer *lrwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublereal d__1, d__2; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + doublereal aapp; + doublecomplex aapq; + doublereal aaqq, ctol; + integer ierr; + doublereal bigtheta; + doublecomplex ompq; + integer pskipped; + extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *); + doublereal aapp0, aapq1, temp1; + integer i__, p, q; + doublereal t, apoaq, aqoap; + extern logical lsame_(char *, char *); + doublereal theta, small, sfmin; + logical lsvec; + doublereal epsln; + logical applv, rsvec, uctol; + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + logical lower, upper, rotok; + integer n2, n4; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *), zaxpy_(integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *); + doublereal rootsfmin; + extern doublereal dznrm2_(integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zgsvj0_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublereal *, + integer *, doublecomplex *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublecomplex *, integer *, integer *), zgsvj1_(char *, integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublereal *, + integer *, doublecomplex *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublecomplex *, integer *, integer *); + integer n34; + doublereal cs; + extern doublereal dlamch_(char *); + doublereal sn; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer ijblsk, swband; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *); + integer blskip; + doublereal mxaapq; + extern /* Subroutine */ int zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *); + doublereal thsign; + extern /* Subroutine */ int zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *); + doublereal mxsinj; + integer ir1; + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + integer emptsw; + logical lquery; + integer notrot, iswrot, jbc; + doublereal big; + integer kbl, lkahead, igl, ibr, jgl, nbl; + doublereal skl; + logical goscale; + doublereal tol; + integer mvl; + logical noscale; + doublereal rootbig, rooteps; + integer rowskip; + doublereal roottol; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + +/* from BLAS */ +/* from LAPACK */ +/* from BLAS */ +/* from LAPACK */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + --sva; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --cwork; + --rwork; + + /* Function Body */ + lsvec = lsame_(jobu, "U") || lsame_(jobu, "F"); + uctol = lsame_(jobu, "C"); + rsvec = lsame_(jobv, "V") || lsame_(jobv, "J"); + applv = lsame_(jobv, "A"); + upper = lsame_(joba, "U"); + lower = lsame_(joba, "L"); + + lquery = *lwork == -1 || *lrwork == -1; + if (! (upper || lower || lsame_(joba, "G"))) { + *info = -1; + } else if (! (lsvec || uctol || lsame_(jobu, "N"))) + { + *info = -2; + } else if (! (rsvec || applv || lsame_(jobv, "N"))) + { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0 || *n > *m) { + *info = -5; + } else if (*lda < *m) { + *info = -7; + } else if (*mv < 0) { + *info = -9; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -11; + } else if (uctol && rwork[1] <= 1.) { + *info = -12; + } else if (*lwork < *m + *n && ! lquery) { + *info = -13; + } else if (*lrwork < f2cmax(*n,6) && ! lquery) { + *info = -15; + } else { + *info = 0; + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESVJ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + i__1 = *m + *n; + cwork[1].r = (doublereal) i__1, cwork[1].i = 0.; + rwork[1] = (doublereal) f2cmax(*n,6); + return 0; + } + +/* #:) Quick return for void matrix */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Set numerical parameters */ +/* The stopping criterion for Jacobi rotations is */ + +/* max_{i<>j}|A(:,i)^* * A(:,j)| / (||A(:,i)||*||A(:,j)||) < CTOL*EPS */ + +/* where EPS is the round-off and CTOL is defined as follows: */ + + if (uctol) { +/* ... user controlled */ + ctol = rwork[1]; + } else { +/* ... default */ + if (lsvec || rsvec || applv) { + ctol = sqrt((doublereal) (*m)); + } else { + ctol = (doublereal) (*m); + } + } +/* ... and the machine dependent parameters are */ +/* [!] (Make sure that SLAMCH() works properly on the target machine.) */ + + epsln = dlamch_("Epsilon"); + rooteps = sqrt(epsln); + sfmin = dlamch_("SafeMinimum"); + rootsfmin = sqrt(sfmin); + small = sfmin / epsln; + big = dlamch_("Overflow"); +/* BIG = ONE / SFMIN */ + rootbig = 1. / rootsfmin; +/* LARGE = BIG / SQRT( DBLE( M*N ) ) */ + bigtheta = 1. / rooteps; + + tol = ctol * epsln; + roottol = sqrt(tol); + + if ((doublereal) (*m) * epsln >= 1.) { + *info = -4; + i__1 = -(*info); + xerbla_("ZGESVJ", &i__1, (ftnlen)6); + return 0; + } + +/* Initialize the right singular vector matrix. */ + + if (rsvec) { + mvl = *n; + zlaset_("A", &mvl, n, &c_b1, &c_b2, &v[v_offset], ldv); + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + +/* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) */ +/* (!) If necessary, scale A to protect the largest singular value */ +/* from overflow. It is possible that saving the largest singular */ +/* value destroys the information about the small ones. */ +/* This initial scaling is almost minimal in the sense that the */ +/* goal is to make sure that no column norm overflows, and that */ +/* SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries */ +/* in A are detected, the procedure returns with INFO=-6. */ + + skl = 1. / sqrt((doublereal) (*m) * (doublereal) (*n)); + noscale = TRUE_; + goscale = TRUE_; + + if (lower) { +/* the input matrix is M-by-N lower triangular (trapezoidal) */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 1.; + i__2 = *m - p + 1; + zlassq_(&i__2, &a[p + p * a_dim1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + xerbla_("ZGESVJ", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * skl); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= skl; +/* L1873: */ + } + } + } +/* L1874: */ + } + } else if (upper) { +/* the input matrix is M-by-N upper triangular (trapezoidal) */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 1.; + zlassq_(&p, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + xerbla_("ZGESVJ", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * skl); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= skl; +/* L2873: */ + } + } + } +/* L2874: */ + } + } else { +/* the input matrix is M-by-N general dense */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.; + aaqq = 1.; + zlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + xerbla_("ZGESVJ", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * skl); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= skl; +/* L3873: */ + } + } + } +/* L3874: */ + } + } + + if (noscale) { + skl = 1.; + } + +/* Move the smaller part of the spectrum from the underflow threshold */ +/* (!) Start by determining the position of the nonzero entries of the */ +/* array SVA() relative to ( SFMIN, BIG ). */ + + aapp = 0.; + aaqq = big; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + if (sva[p] != 0.) { +/* Computing MIN */ + d__1 = aaqq, d__2 = sva[p]; + aaqq = f2cmin(d__1,d__2); + } +/* Computing MAX */ + d__1 = aapp, d__2 = sva[p]; + aapp = f2cmax(d__1,d__2); +/* L4781: */ + } + +/* #:) Quick return for zero matrix */ + + if (aapp == 0.) { + if (lsvec) { + zlaset_("G", m, n, &c_b1, &c_b2, &a[a_offset], lda); + } + rwork[1] = 1.; + rwork[2] = 0.; + rwork[3] = 0.; + rwork[4] = 0.; + rwork[5] = 0.; + rwork[6] = 0.; + return 0; + } + +/* #:) Quick return for one-column matrix */ + + if (*n == 1) { + if (lsvec) { + zlascl_("G", &c__0, &c__0, &sva[1], &skl, m, &c__1, &a[a_dim1 + 1] + , lda, &ierr); + } + rwork[1] = 1. / skl; + if (sva[1] >= sfmin) { + rwork[2] = 1.; + } else { + rwork[2] = 0.; + } + rwork[3] = 0.; + rwork[4] = 0.; + rwork[5] = 0.; + rwork[6] = 0.; + return 0; + } + +/* Protect small singular values from underflow, and try to */ +/* avoid underflows/overflows in computing Jacobi rotations. */ + + sn = sqrt(sfmin / epsln); + temp1 = sqrt(big / (doublereal) (*n)); + if (aapp <= sn || aaqq >= temp1 || sn <= aaqq && aapp <= temp1) { +/* Computing MIN */ + d__1 = big, d__2 = temp1 / aapp; + temp1 = f2cmin(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq <= sn && aapp <= temp1) { +/* Computing MIN */ + d__1 = sn / aaqq, d__2 = big / (aapp * sqrt((doublereal) (*n))); + temp1 = f2cmin(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq >= sn && aapp >= temp1) { +/* Computing MAX */ + d__1 = sn / aaqq, d__2 = temp1 / aapp; + temp1 = f2cmax(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq <= sn && aapp >= temp1) { +/* Computing MIN */ + d__1 = sn / aaqq, d__2 = big / (sqrt((doublereal) (*n)) * aapp); + temp1 = f2cmin(d__1,d__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else { + temp1 = 1.; + } + +/* Scale, if necessary */ + + if (temp1 != 1.) { + dlascl_("G", &c__0, &c__0, &c_b42, &temp1, n, &c__1, &sva[1], n, & + ierr); + } + skl = temp1 * skl; + if (skl != 1.) { + zlascl_(joba, &c__0, &c__0, &c_b42, &skl, m, n, &a[a_offset], lda, & + ierr); + skl = 1. / skl; + } + +/* Row-cyclic Jacobi SVD algorithm with column pivoting */ + + emptsw = *n * (*n - 1) / 2; + notrot = 0; + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + i__2 = q; + cwork[i__2].r = 1., cwork[i__2].i = 0.; +/* L1868: */ + } + + + + swband = 3; +/* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */ +/* if ZGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm ZGEJSV. For sweeps i=1:SWBAND the procedure */ +/* works on pivots inside a band-like region around the diagonal. */ +/* The boundaries are determined dynamically, based on the number of */ +/* pivots above a threshold. */ + + kbl = f2cmin(8,*n); +/* [TP] KBL is a tuning parameter that defines the tile size in the */ +/* tiling of the p-q loops of pivot pairs. In general, an optimal */ +/* value of KBL depends on the matrix dimensions and on the */ +/* parameters of the computer's memory. */ + + nbl = *n / kbl; + if (nbl * kbl != *n) { + ++nbl; + } + +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + + rowskip = f2cmin(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + + lkahead = 1; +/* [TP] LKAHEAD is a tuning parameter. */ + +/* Quasi block transformations, using the lower (upper) triangular */ +/* structure of the input matrix. The quasi-block-cycling usually */ +/* invokes cubic convergence. Big part of this cycle is done inside */ +/* canonical subspaces of dimensions less than M. */ + +/* Computing MAX */ + i__1 = 64, i__2 = kbl << 2; + if ((lower || upper) && *n > f2cmax(i__1,i__2)) { +/* [TP] The number of partition levels and the actual partition are */ +/* tuning parameters. */ + n4 = *n / 4; + n2 = *n / 2; + n34 = n4 * 3; + if (applv) { + q = 0; + } else { + q = 1; + } + + if (lower) { + +/* This works very well on lower triangular matrices, in particular */ +/* in the framework of the preconditioned Jacobi SVD (xGEJSV). */ +/* The idea is simple: */ +/* [+ 0 0 0] Note that Jacobi transformations of [0 0] */ +/* [+ + 0 0] [0 0] */ +/* [+ + x 0] actually work on [x 0] [x 0] */ +/* [+ + x x] [x x]. [x x] */ + + i__1 = *m - n34; + i__2 = *n - n34; + i__3 = *lwork - *n; + zgsvj0_(jobv, &i__1, &i__2, &a[n34 + 1 + (n34 + 1) * a_dim1], lda, + &cwork[n34 + 1], &sva[n34 + 1], &mvl, &v[n34 * q + 1 + ( + n34 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, & + cwork[*n + 1], &i__3, &ierr); + i__1 = *m - n2; + i__2 = n34 - n2; + i__3 = *lwork - *n; + zgsvj0_(jobv, &i__1, &i__2, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, & + cwork[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, &cwork[*n + + 1], &i__3, &ierr); + i__1 = *m - n2; + i__2 = *n - n2; + i__3 = *lwork - *n; + zgsvj1_(jobv, &i__1, &i__2, &n4, &a[n2 + 1 + (n2 + 1) * a_dim1], + lda, &cwork[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + ( + n2 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, & + cwork[*n + 1], &i__3, &ierr); + i__1 = *m - n4; + i__2 = n2 - n4; + i__3 = *lwork - *n; + zgsvj0_(jobv, &i__1, &i__2, &a[n4 + 1 + (n4 + 1) * a_dim1], lda, & + cwork[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &cwork[*n + + 1], &i__3, &ierr); + + i__1 = *lwork - *n; + zgsvj0_(jobv, m, &n4, &a[a_offset], lda, &cwork[1], &sva[1], &mvl, + &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &cwork[* + n + 1], &i__1, &ierr); + + i__1 = *lwork - *n; + zgsvj1_(jobv, m, &n2, &n4, &a[a_offset], lda, &cwork[1], &sva[1], + &mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, & + cwork[*n + 1], &i__1, &ierr); + + + } else if (upper) { + + + i__1 = *lwork - *n; + zgsvj0_(jobv, &n4, &n4, &a[a_offset], lda, &cwork[1], &sva[1], & + mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__2, & + cwork[*n + 1], &i__1, &ierr); + + i__1 = *lwork - *n; + zgsvj0_(jobv, &n2, &n4, &a[(n4 + 1) * a_dim1 + 1], lda, &cwork[n4 + + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) * + v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &cwork[*n + 1], + &i__1, &ierr); + + i__1 = *lwork - *n; + zgsvj1_(jobv, &n2, &n2, &n4, &a[a_offset], lda, &cwork[1], &sva[1] + , &mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, & + cwork[*n + 1], &i__1, &ierr); + + i__1 = n2 + n4; + i__2 = *lwork - *n; + zgsvj0_(jobv, &i__1, &n4, &a[(n2 + 1) * a_dim1 + 1], lda, &cwork[ + n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) * + v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &cwork[*n + 1], + &i__2, &ierr); + } + + } + + + for (i__ = 1; i__ <= 30; ++i__) { + + + mxaapq = 0.; + mxsinj = 0.; + iswrot = 0; + + notrot = 0; + pskipped = 0; + +/* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */ +/* 1 <= p < q <= N. This is the first step toward a blocked implementation */ +/* of the rotations. New implementation, based on block transformations, */ +/* is under development. */ + + i__1 = nbl; + for (ibr = 1; ibr <= i__1; ++ibr) { + + igl = (ibr - 1) * kbl + 1; + +/* Computing MIN */ + i__3 = lkahead, i__4 = nbl - ibr; + i__2 = f2cmin(i__3,i__4); + for (ir1 = 0; ir1 <= i__2; ++ir1) { + + igl += ir1 * kbl; + +/* Computing MIN */ + i__4 = igl + kbl - 1, i__5 = *n - 1; + i__3 = f2cmin(i__4,i__5); + for (p = igl; p <= i__3; ++p) { + + + i__4 = *n - p + 1; + q = idamax_(&i__4, &sva[p], &c__1) + p - 1; + if (p != q) { + zswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + + 1], &c__1); + if (rsvec) { + zswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1); + } + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + i__4 = p; + aapq.r = cwork[i__4].r, aapq.i = cwork[i__4].i; + i__4 = p; + i__5 = q; + cwork[i__4].r = cwork[i__5].r, cwork[i__4].i = cwork[ + i__5].i; + i__4 = q; + cwork[i__4].r = aapq.r, cwork[i__4].i = aapq.i; + } + + if (ir1 == 0) { + +/* Column norms are periodically updated by explicit */ +/* norm computation. */ +/* [!] Caveat: */ +/* Unfortunately, some BLAS implementations compute DZNRM2(M,A(1,p),1) */ +/* as SQRT(S=CDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to */ +/* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to */ +/* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). */ +/* Hence, DZNRM2 cannot be trusted, not even in the case when */ +/* the true norm is far from the under(over)flow boundaries. */ +/* If properly implemented SCNRM2 is available, the IF-THEN-ELSE-END IF */ +/* below should be replaced with "AAPP = DZNRM2( M, A(1,p), 1 )". */ + + if (sva[p] < rootbig && sva[p] > rootsfmin) { + sva[p] = dznrm2_(m, &a[p * a_dim1 + 1], &c__1); + } else { + temp1 = 0.; + aapp = 1.; + zlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & + aapp); + sva[p] = temp1 * sqrt(aapp); + } + aapp = sva[p]; + } else { + aapp = sva[p]; + } + + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = f2cmin(i__5,*n); + for (q = p + 1; q <= i__4; ++q) { + + aaqq = sva[q]; + + if (aaqq > 0.) { + + aapp0 = aapp; + if (aaqq >= 1.) { + rotok = small * aapp <= aaqq; + if (aapp < big / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + z__2.r = z__3.r / aaqq, z__2.i = + z__3.i / aaqq; + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[p * a_dim1 + 1], &c__1, & + cwork[*n + 1], &c__1); + zlascl_("G", &c__0, &c__0, &aapp, & + c_b42, m, &c__1, &cwork[*n + + 1], lda, &ierr); + zdotc_(&z__2, m, &cwork[*n + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + z__1.r = z__2.r / aaqq, z__1.i = + z__2.i / aaqq; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } else { + rotok = aapp <= aaqq / small; + if (aapp > small / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + z__2.r = z__3.r / aapp, z__2.i = + z__3.i / aapp; + z__1.r = z__2.r / aaqq, z__1.i = + z__2.i / aaqq; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[q * a_dim1 + 1], &c__1, & + cwork[*n + 1], &c__1); + zlascl_("G", &c__0, &c__0, &aaqq, & + c_b42, m, &c__1, &cwork[*n + + 1], lda, &ierr); + zdotc_(&z__2, m, &a[p * a_dim1 + 1], & + c__1, &cwork[*n + 1], &c__1); + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } + +/* AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q) */ + aapq1 = -z_abs(&aapq); +/* Computing MAX */ + d__1 = mxaapq, d__2 = -aapq1; + mxaapq = f2cmax(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq1) > tol) { + d__1 = z_abs(&aapq); + z__1.r = aapq.r / d__1, z__1.i = aapq.i / + d__1; + ompq.r = z__1.r, ompq.i = z__1.i; + +/* [RTD] ROTATED = ROTATED + ONE */ + + if (ir1 == 0) { + notrot = 0; + pskipped = 0; + ++iswrot; + } + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq1; + + if (abs(theta) > bigtheta) { + + t = .5 / theta; + cs = 1.; + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * + z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = f2cmax(d__1,d__2); + + } else { + + + thsign = -d_sign(&c_b42, &aapq1); + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; + +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = f2cmax(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); + + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn + * z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } + } + i__5 = p; + i__6 = q; + z__2.r = -cwork[i__6].r, z__2.i = + -cwork[i__6].i; + z__1.r = z__2.r * ompq.r - z__2.i * + ompq.i, z__1.i = z__2.r * + ompq.i + z__2.i * ompq.r; + cwork[i__5].r = z__1.r, cwork[i__5].i + = z__1.i; + + } else { + zcopy_(m, &a[p * a_dim1 + 1], &c__1, & + cwork[*n + 1], &c__1); + zlascl_("G", &c__0, &c__0, &aapp, & + c_b42, m, &c__1, &cwork[*n + + 1], lda, &ierr); + zlascl_("G", &c__0, &c__0, &aaqq, & + c_b42, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + z__1.r = -aapq.r, z__1.i = -aapq.i; + zaxpy_(m, &z__1, &cwork[*n + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + zlascl_("G", &c__0, &c__0, &c_b42, & + aaqq, m, &c__1, &a[q * a_dim1 + + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq1 * aapq1; + sva[q] = aaqq * sqrt((f2cmax(d__1,d__2))) + ; + mxsinj = f2cmax(mxsinj,sfmin); + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* recompute SVA(q), SVA(p). */ + +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = dznrm2_(m, &a[q * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aaqq = 1.; + zlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq); + } + } + if (aapp / aapp0 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = dznrm2_(m, &a[p * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aapp = 1.; + zlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp); + } + sva[p] = aapp; + } + + } else { +/* A(:,p) and A(:,q) already numerically orthogonal */ + if (ir1 == 0) { + ++notrot; + } +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + } + } else { +/* A(:,q) is zero column */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + + if (i__ <= swband && pskipped > rowskip) { + if (ir1 == 0) { + aapp = -aapp; + } + notrot = 0; + goto L2103; + } + +/* L2002: */ + } +/* END q-LOOP */ + +L2103: +/* bailed out of q-loop */ + + sva[p] = aapp; + + } else { + sva[p] = aapp; + if (ir1 == 0 && aapp == 0.) { +/* Computing MIN */ + i__4 = igl + kbl - 1; + notrot = notrot + f2cmin(i__4,*n) - p; + } + } + +/* L2001: */ + } +/* end of the p-loop */ +/* end of doing the block ( ibr, ibr ) */ +/* L1002: */ + } +/* end of ir1-loop */ + +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + + i__2 = nbl; + for (jbc = ibr + 1; jbc <= i__2; ++jbc) { + + jgl = (jbc - 1) * kbl + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = f2cmin(i__4,*n); + for (p = igl; p <= i__3; ++p) { + + aapp = sva[p]; + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__5 = jgl + kbl - 1; + i__4 = f2cmin(i__5,*n); + for (q = jgl; q <= i__4; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.) { + aapp0 = aapp; + + +/* Safe Gram matrix computation */ + + if (aaqq >= 1.) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + z__2.r = z__3.r / aaqq, z__2.i = + z__3.i / aaqq; + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[p * a_dim1 + 1], &c__1, & + cwork[*n + 1], &c__1); + zlascl_("G", &c__0, &c__0, &aapp, & + c_b42, m, &c__1, &cwork[*n + + 1], lda, &ierr); + zdotc_(&z__2, m, &cwork[*n + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + z__1.r = z__2.r / aaqq, z__1.i = + z__2.i / aaqq; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + d__1 = f2cmax(aaqq,aapp); + z__2.r = z__3.r / d__1, z__2.i = + z__3.i / d__1; + d__2 = f2cmin(aaqq,aapp); + z__1.r = z__2.r / d__2, z__1.i = + z__2.i / d__2; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[q * a_dim1 + 1], &c__1, & + cwork[*n + 1], &c__1); + zlascl_("G", &c__0, &c__0, &aaqq, & + c_b42, m, &c__1, &cwork[*n + + 1], lda, &ierr); + zdotc_(&z__2, m, &a[p * a_dim1 + 1], & + c__1, &cwork[*n + 1], &c__1); + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } + +/* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) */ + aapq1 = -z_abs(&aapq); +/* Computing MAX */ + d__1 = mxaapq, d__2 = -aapq1; + mxaapq = f2cmax(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq1) > tol) { + d__1 = z_abs(&aapq); + z__1.r = aapq.r / d__1, z__1.i = aapq.i / + d__1; + ompq.r = z__1.r, ompq.i = z__1.i; + notrot = 0; +/* [RTD] ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq1; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5 / theta; + cs = 1.; + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * + z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = f2cmax(d__1,d__2); + } else { + + + thsign = -d_sign(&c_b42, &aapq1); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = f2cmax(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); + + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn + * z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } + } + i__5 = p; + i__6 = q; + z__2.r = -cwork[i__6].r, z__2.i = + -cwork[i__6].i; + z__1.r = z__2.r * ompq.r - z__2.i * + ompq.i, z__1.i = z__2.r * + ompq.i + z__2.i * ompq.r; + cwork[i__5].r = z__1.r, cwork[i__5].i + = z__1.i; + + } else { + if (aapp > aaqq) { + zcopy_(m, &a[p * a_dim1 + 1], & + c__1, &cwork[*n + 1], & + c__1); + zlascl_("G", &c__0, &c__0, &aapp, + &c_b42, m, &c__1, &cwork[* + n + 1], lda, &ierr); + zlascl_("G", &c__0, &c__0, &aaqq, + &c_b42, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + z__1.r = -aapq.r, z__1.i = + -aapq.i; + zaxpy_(m, &z__1, &cwork[*n + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1); + zlascl_("G", &c__0, &c__0, &c_b42, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq1 * + aapq1; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); + mxsinj = f2cmax(mxsinj,sfmin); + } else { + zcopy_(m, &a[q * a_dim1 + 1], & + c__1, &cwork[*n + 1], & + c__1); + zlascl_("G", &c__0, &c__0, &aaqq, + &c_b42, m, &c__1, &cwork[* + n + 1], lda, &ierr); + zlascl_("G", &c__0, &c__0, &aapp, + &c_b42, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + d_cnjg(&z__2, &aapq); + z__1.r = -z__2.r, z__1.i = + -z__2.i; + zaxpy_(m, &z__1, &cwork[*n + 1], & + c__1, &a[p * a_dim1 + 1], + &c__1); + zlascl_("G", &c__0, &c__0, &c_b42, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq1 * + aapq1; + sva[p] = aapp * sqrt((f2cmax(d__1, + d__2))); + mxsinj = f2cmax(mxsinj,sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = dznrm2_(m, &a[q * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aaqq = 1.; + zlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq); + } + } +/* Computing 2nd power */ + d__1 = aapp / aapp0; + if (d__1 * d__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = dznrm2_(m, &a[p * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aapp = 1.; + zlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp); + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + + if (aapp == 0.) { +/* Computing MIN */ + i__4 = jgl + kbl - 1; + notrot = notrot + f2cmin(i__4,*n) - jgl + 1; + } + if (aapp < 0.) { + notrot = 0; + } + + } + +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__3 = igl + kbl - 1; + i__2 = f2cmin(i__3,*n); + for (p = igl; p <= i__2; ++p) { + sva[p] = (d__1 = sva[p], abs(d__1)); +/* L2012: */ + } +/* ** */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = dznrm2_(m, &a[*n * a_dim1 + 1], &c__1); + } else { + t = 0.; + aapp = 1.; + zlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp); + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < sqrt((doublereal) (*n)) * tol && ( + doublereal) (*n) * mxaapq * mxsinj < tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } + +/* L1993: */ + } +/* end i=1:NSWEEP loop */ + +/* #:( Reaching this point means that the procedure has not converged. */ + *info = 29; + goto L1995; + +L1994: +/* #:) Reaching this point means numerical convergence after the i-th */ +/* sweep. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the singular values and find how many are above */ +/* the underflow threshold. */ + + n2 = 0; + n4 = 0; + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = idamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + zswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + zswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } + if (sva[p] != 0.) { + ++n4; + if (sva[p] * skl > sfmin) { + ++n2; + } + } +/* L5991: */ + } + if (sva[*n] != 0.) { + ++n4; + if (sva[*n] * skl > sfmin) { + ++n2; + } + } + +/* Normalize the left singular vectors. */ + + if (lsvec || uctol) { + i__1 = n4; + for (p = 1; p <= i__1; ++p) { +/* CALL ZDSCAL( M, ONE / SVA( p ), A( 1, p ), 1 ) */ + zlascl_("G", &c__0, &c__0, &sva[p], &c_b42, m, &c__1, &a[p * + a_dim1 + 1], m, &ierr); +/* L1998: */ + } + } + +/* Scale the product of Jacobi rotations. */ + + if (rsvec) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = 1. / dznrm2_(&mvl, &v[p * v_dim1 + 1], &c__1); + zdscal_(&mvl, &temp1, &v[p * v_dim1 + 1], &c__1); +/* L2399: */ + } + } + +/* Undo scaling, if necessary (and possible). */ + if (skl > 1. && sva[1] < big / skl || skl < 1. && sva[f2cmax(n2,1)] > sfmin / + skl) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + sva[p] = skl * sva[p]; +/* L2400: */ + } + skl = 1.; + } + + rwork[1] = skl; +/* The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE */ +/* then some of the singular values may overflow or underflow and */ +/* the spectrum is given in this factored representation. */ + + rwork[2] = (doublereal) n4; +/* N4 is the number of computed nonzero singular values of A. */ + + rwork[3] = (doublereal) n2; +/* N2 is the number of singular values of A greater than SFMIN. */ +/* If N2 +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGESVX computes the solution to system of linear equations A * X = B for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGESVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, */ +/* WORK, RWORK, INFO ) */ + +/* CHARACTER EQUED, FACT, TRANS */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS */ +/* DOUBLE PRECISION RCOND */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION BERR( * ), C( * ), FERR( * ), R( * ), */ +/* $ RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGESVX uses the LU factorization to compute the solution to a complex */ +/* > system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > Error bounds on the solution and a condition estimate are also */ +/* > provided. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* > the system: */ +/* > TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* > TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* > TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* > or diag(C)*B (if TRANS = 'T' or 'C'). */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the */ +/* > matrix A (after equilibration if FACT = 'E') as */ +/* > A = P * L * U, */ +/* > where P is a permutation matrix, L is a unit lower triangular */ +/* > matrix, and U is upper triangular. */ +/* > */ +/* > 3. If some U(i,i)=0, so that U is exactly singular, then the routine */ +/* > returns with INFO = i. Otherwise, the factored form of A is used */ +/* > to estimate the condition number of the matrix A. If the */ +/* > reciprocal of the condition number is less than machine precision, */ +/* > INFO = N+1 is returned as a warning, but the routine still goes on */ +/* > to solve for X and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. Iterative refinement is applied to improve the computed solution */ +/* > matrix and calculate error bounds and backward error estimates */ +/* > for it. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* > that it solves the original system before equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* > If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by R and C. */ +/* > A, AF, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ +/* > not 'N', then A must have been equilibrated by the scaling */ +/* > factors in R and/or C. A is not modified if FACT = 'F' or */ +/* > 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ +/* > */ +/* > On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* > EQUED = 'R': A := diag(R) * A */ +/* > EQUED = 'C': A := A * diag(C) */ +/* > EQUED = 'B': A := diag(R) * A * diag(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is COMPLEX*16 array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the factors L and U from the factorization */ +/* > A = P*L*U as computed by ZGETRF. If EQUED .ne. 'N', then */ +/* > AF is the factored form of the equilibrated matrix A. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the equilibrated matrix A (see the description of A for */ +/* > the form of the equilibrated matrix). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > as computed by ZGETRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the equilibrated matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. R is an input argument if FACT = 'F'; */ +/* > otherwise, R is an output argument. If FACT = 'F' and */ +/* > EQUED = 'R' or 'B', each element of R must be positive. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. C is an input argument if FACT = 'F'; */ +/* > otherwise, C is an output argument. If FACT = 'F' and */ +/* > EQUED = 'C' or 'B', each element of C must be positive. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* > diag(R)*B; */ +/* > if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* > overwritten by diag(C)*B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (LDX,NRHS) */ +/* > If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */ +/* > to the original system of equations. Note that A and B are */ +/* > modified on exit if EQUED .ne. 'N', and the solution to the */ +/* > equilibrated system is inv(diag(C))*X if TRANS = 'N' and */ +/* > EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */ +/* > and EQUED = 'R' or 'B'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The estimate of the reciprocal condition number of the matrix */ +/* > A after equilibration (if done). If RCOND is less than the */ +/* > machine precision (in particular, if RCOND = 0), the matrix */ +/* > is singular to working precision. This condition is */ +/* > indicated by a return code of INFO > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (2*N) */ +/* > On exit, RWORK(1) contains the reciprocal pivot growth */ +/* > factor norm(A)/norm(U). The "f2cmax absolute element" norm is */ +/* > used. If RWORK(1) is much less than 1, then the stability */ +/* > of the LU factorization of the (equilibrated) matrix A */ +/* > could be poor. This also means that the solution X, condition */ +/* > estimator RCOND, and forward error bound FERR could be */ +/* > unreliable. If factorization fails with 0 RWORK(1) contains the reciprocal pivot growth factor for the */ +/* > leading INFO columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: U(i,i) is exactly zero. The factorization has */ +/* > been completed, but the factor U is exactly */ +/* > singular, so the solution and error bounds */ +/* > could not be computed. RCOND = 0 is returned. */ +/* > = N+1: U is nonsingular, but RCOND is less than machine */ +/* > precision, meaning that the matrix is singular */ +/* > to working precision. Nevertheless, the */ +/* > solution and error bounds are computed because */ +/* > there are a number of situations where the */ +/* > computed solution can be more accurate than the */ +/* > value of RCOND would suggest. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complex16GEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex * + work, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2; + doublecomplex z__1; + + /* Local variables */ + doublereal amax; + char norm[1]; + integer i__, j; + extern logical lsame_(char *, char *); + doublereal rcmin, rcmax, anorm; + logical equil; + extern doublereal dlamch_(char *); + doublereal colcnd; + logical nofact; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + doublereal bignum; + extern /* Subroutine */ int zlaqge_(integer *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublereal *, doublereal * + , doublereal *, char *), zgecon_(char *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublereal *, integer *); + integer infequ; + logical colequ; + doublereal rowcnd; + extern /* Subroutine */ int zgeequ_(integer *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublereal *, doublereal * + , doublereal *, integer *); + logical notran; + extern /* Subroutine */ int zgerfs_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *, doublecomplex *, doublereal *, + integer *), zgetrf_(integer *, integer *, doublecomplex *, + integer *, integer *, integer *), zlacpy_(char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *); + extern doublereal zlantr_(char *, char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *); + doublereal smlnum; + extern /* Subroutine */ int zgetrs_(char *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *); + logical rowequ; + doublereal rpvgrw; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + notran = lsame_(trans, "N"); + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + smlnum = dlamch_("Safe minimum"); + bignum = 1. / smlnum; + } + +/* Test the input parameters. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (lsame_(fact, "F") && ! (rowequ || colequ + || lsame_(equed, "N"))) { + *info = -10; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = f2cmax(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -11; + } else if (*n > 0) { + rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -12; + } else if (*n > 0) { + colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -14; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESVX", &i__1, (ftnlen)6); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + zgeequ_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, & + amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + zlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & + colcnd, &amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + } + +/* Scale the right hand side. */ + + if (notran) { + if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__; + i__5 = i__ + j * b_dim1; + z__1.r = r__[i__4] * b[i__5].r, z__1.i = r__[i__4] * b[ + i__5].i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L30: */ + } +/* L40: */ + } + } + } else if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__; + i__5 = i__ + j * b_dim1; + z__1.r = c__[i__4] * b[i__5].r, z__1.i = c__[i__4] * b[i__5] + .i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L50: */ + } +/* L60: */ + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + zlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); + zgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + rpvgrw = zlantr_("M", "U", "N", info, info, &af[af_offset], ldaf, + &rwork[1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = zlange_("M", n, info, &a[a_offset], lda, &rwork[1]) / rpvgrw; + } + rwork[1] = rpvgrw; + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A and the */ +/* reciprocal pivot growth factor RPVGRW. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = zlange_(norm, n, n, &a[a_offset], lda, &rwork[1]); + rpvgrw = zlantr_("M", "U", "N", n, n, &af[af_offset], ldaf, &rwork[1]); + if (rpvgrw == 0.) { + rpvgrw = 1.; + } else { + rpvgrw = zlange_("M", n, n, &a[a_offset], lda, &rwork[1]) / + rpvgrw; + } + +/* Compute the reciprocal of the condition number of A. */ + + zgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &rwork[1], + info); + +/* Compute the solution matrix X. */ + + zlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + zgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + zgerfs_(trans, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], + &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[ + 1], &rwork[1], info); + +/* Transform the solution matrix X to a solution of the original */ +/* system. */ + + if (notran) { + if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * x_dim1; + i__4 = i__; + i__5 = i__ + j * x_dim1; + z__1.r = c__[i__4] * x[i__5].r, z__1.i = c__[i__4] * x[ + i__5].i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= colcnd; +/* L90: */ + } + } + } else if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * x_dim1; + i__4 = i__; + i__5 = i__ + j * x_dim1; + z__1.r = r__[i__4] * x[i__5].r, z__1.i = r__[i__4] * x[i__5] + .i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L100: */ + } +/* L110: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= rowcnd; +/* L120: */ + } + } + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < dlamch_("Epsilon")) { + *info = *n + 1; + } + + rwork[1] = rpvgrw; + return 0; + +/* End of ZGESVX */ + +} /* zgesvx_ */ + diff --git a/lapack-netlib/SRC/zgesvxx.c b/lapack-netlib/SRC/zgesvxx.c new file mode 100644 index 000000000..061ba9e13 --- /dev/null +++ b/lapack-netlib/SRC/zgesvxx.c @@ -0,0 +1,1214 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGESVXX computes the solution to system of linear equations A * X = B for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGESVXX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, */ +/* BERR, N_ERR_BNDS, ERR_BNDS_NORM, */ +/* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, */ +/* INFO ) */ + +/* CHARACTER EQUED, FACT, TRANS */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */ +/* $ N_ERR_BNDS */ +/* DOUBLE PRECISION RCOND, RPVGRW */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ X( LDX , * ),WORK( * ) */ +/* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGESVXX uses the LU factorization to compute the solution to a */ +/* > complex*16 system of linear equations A * X = B, where A is an */ +/* > N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > If requested, both normwise and maximum componentwise error bounds */ +/* > are returned. ZGESVXX will return a solution with a tiny */ +/* > guaranteed error (O(eps) where eps is the working machine */ +/* > precision) unless the matrix is very ill-conditioned, in which */ +/* > case a warning is returned. Relevant condition numbers also are */ +/* > calculated and returned. */ +/* > */ +/* > ZGESVXX accepts user-provided factorizations and equilibration */ +/* > factors; see the definitions of the FACT and EQUED options. */ +/* > Solving with refinement and using a factorization from a previous */ +/* > ZGESVXX call will also produce a solution with either O(eps) */ +/* > errors or warnings, but we cannot make that claim for general */ +/* > user-provided factorizations and equilibration factors if they */ +/* > differ from what ZGESVXX would itself produce. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ +/* > the system: */ +/* > */ +/* > TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* > TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* > TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ +/* > */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* > or diag(C)*B (if TRANS = 'T' or 'C'). */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ +/* > the matrix A (after equilibration if FACT = 'E') as */ +/* > */ +/* > A = P * L * U, */ +/* > */ +/* > where P is a permutation matrix, L is a unit lower triangular */ +/* > matrix, and U is upper triangular. */ +/* > */ +/* > 3. If some U(i,i)=0, so that U is exactly singular, then the */ +/* > routine returns with INFO = i. Otherwise, the factored form of A */ +/* > is used to estimate the condition number of the matrix A (see */ +/* > argument RCOND). If the reciprocal of the condition number is less */ +/* > than machine precision, the routine still goes on to solve for X */ +/* > and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* > the routine will use iterative refinement to try to get a small */ +/* > error and error bounds. Refinement calculates the residual to at */ +/* > least twice the working precision. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* > that it solves the original system before equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \verbatim */ +/* > Some optional parameters are bundled in the PARAMS array. These */ +/* > settings determine how refinement is performed, but often the */ +/* > defaults are acceptable. If the defaults are acceptable, users */ +/* > can pass NPARAMS = 0 which prevents the source code from accessing */ +/* > the PARAMS argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* > If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by R and C. */ +/* > A, AF, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ +/* > not 'N', then A must have been equilibrated by the scaling */ +/* > factors in R and/or C. A is not modified if FACT = 'F' or */ +/* > 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ +/* > */ +/* > On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* > EQUED = 'R': A := diag(R) * A */ +/* > EQUED = 'C': A := A * diag(C) */ +/* > EQUED = 'B': A := diag(R) * A * diag(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is COMPLEX*16 array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the factors L and U from the factorization */ +/* > A = P*L*U as computed by ZGETRF. If EQUED .ne. 'N', then */ +/* > AF is the factored form of the equilibrated matrix A. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the equilibrated matrix A (see the description of A for */ +/* > the form of the equilibrated matrix). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > as computed by ZGETRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the equilibrated matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. R is an input argument if FACT = 'F'; */ +/* > otherwise, R is an output argument. If FACT = 'F' and */ +/* > EQUED = 'R' or 'B', each element of R must be positive. */ +/* > If R is output, each element of R is a power of the radix. */ +/* > If R is input, each element of R should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. C is an input argument if FACT = 'F'; */ +/* > otherwise, C is an output argument. If FACT = 'F' and */ +/* > EQUED = 'C' or 'B', each element of C must be positive. */ +/* > If C is output, each element of C is a power of the radix. */ +/* > If C is input, each element of C should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* > diag(R)*B; */ +/* > if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* > overwritten by diag(C)*B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (LDX,NRHS) */ +/* > If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* > system of equations. Note that A and B are modified on exit */ +/* > if EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* > inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */ +/* > inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RPVGRW */ +/* > \verbatim */ +/* > RPVGRW is DOUBLE PRECISION */ +/* > Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* > pivot growth factor norm(A)/norm(U). The "f2cmax absolute element" */ +/* > norm is used. If this is much less than 1, then the stability of */ +/* > the LU factorization of the (equilibrated) matrix A could be poor. */ +/* > This also means that the solution X, estimated condition numbers, */ +/* > and error bounds could be unreliable. If factorization fails with */ +/* > 0 for the leading INFO columns of A. In ZGESVX, this quantity is */ +/* > returned in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > Componentwise relative backward error. This is the */ +/* > componentwise relative backward error of each solution vector X(j) */ +/* > (i.e., the smallest relative change in any element of A or B that */ +/* > makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_ERR_BNDS */ +/* > \verbatim */ +/* > N_ERR_BNDS is INTEGER */ +/* > Number of error bounds to return for each right hand side */ +/* > and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* > ERR_BNDS_COMP below. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NPARAMS */ +/* > \verbatim */ +/* > NPARAMS is INTEGER */ +/* > Specifies the number of parameters set in PARAMS. If <= 0, the */ +/* > PARAMS array is never referenced and default values are used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PARAMS */ +/* > \verbatim */ +/* > PARAMS is DOUBLE PRECISION array, dimension NPARAMS */ +/* > Specifies algorithm parameters. If an entry is < 0.0, then */ +/* > that entry will be filled with default value used for that */ +/* > parameter. Only positions up to NPARAMS are accessed; defaults */ +/* > are used for higher-numbered parameters. */ +/* > */ +/* > PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* > refinement or not. */ +/* > Default: 1.0D+0 */ +/* > = 0.0: No refinement is performed, and no error bounds are */ +/* > computed. */ +/* > = 1.0: Use the extra-precise refinement algorithm. */ +/* > (other values are reserved for future use) */ +/* > */ +/* > PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* > computations allowed for refinement. */ +/* > Default: 10 */ +/* > Aggressive: Set to 100 to permit convergence using approximate */ +/* > factorizations or factorizations other than LU. If */ +/* > the factorization uses a technique other than */ +/* > Gaussian elimination, the guarantees in */ +/* > err_bnds_norm and err_bnds_comp may no longer be */ +/* > trustworthy. */ +/* > */ +/* > PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* > will attempt to find a solution with small componentwise */ +/* > relative error in the double-precision algorithm. Positive */ +/* > is true, 0.0 is false. */ +/* > Default: 1.0 (attempt componentwise convergence) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. The solution to every right-hand side is */ +/* > guaranteed. */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly singular, so */ +/* > the solution and error bounds could not be computed. RCOND = 0 */ +/* > is returned. */ +/* > = N+J: The solution corresponding to the Jth right-hand side is */ +/* > not guaranteed. The solutions corresponding to other right- */ +/* > hand sides K with K > J may not be guaranteed as well, but */ +/* > only the first such right-hand side is reported. If a small */ +/* > componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* > the Jth right-hand side is the first with a normwise error */ +/* > bound that is not guaranteed (the smallest J such */ +/* > that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* > the Jth right-hand side is the first with either a normwise or */ +/* > componentwise error bound that is not guaranteed (the smallest */ +/* > J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* > ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* > ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* > about all of the right-hand sides check ERR_BNDS_NORM or */ +/* > ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complex16GEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, + doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, + doublereal *rcond, doublereal *rpvgrw, doublereal *berr, integer * + n_err_bnds__, doublereal *err_bnds_norm__, doublereal * + err_bnds_comp__, integer *nparams, doublereal *params, doublecomplex * + work, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal amax; + extern doublereal zla_gerpvgrw_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + integer j; + extern logical lsame_(char *, char *); + doublereal rcmin, rcmax; + logical equil; + extern doublereal dlamch_(char *); + doublereal colcnd; + logical nofact; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int zlaqge_(integer *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublereal *, doublereal * + , doublereal *, char *); + integer infequ; + logical colequ; + doublereal rowcnd; + logical notran; + extern /* Subroutine */ int zgetrf_(integer *, integer *, doublecomplex *, + integer *, integer *, integer *), zlacpy_(char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal smlnum; + extern /* Subroutine */ int zgetrs_(char *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *); + logical rowequ; + extern /* Subroutine */ int zlascl2_(integer *, integer *, doublereal *, + doublecomplex *, integer *), zgeequb_(integer *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *), zgerfsx_( + char *, char *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, doublereal *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublecomplex *, doublereal *, integer * + ); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ================================================================== */ + + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --berr; + --params; + --work; + --rwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + notran = lsame_(trans, "N"); + smlnum = dlamch_("Safe minimum"); + bignum = 1. / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in ZGERFSX. */ + + *rpvgrw = 0.; + +/* Test the input parameters. PARAMS is not tested until ZGERFSX. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (lsame_(fact, "F") && ! (rowequ || colequ + || lsame_(equed, "N"))) { + *info = -10; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = r__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = r__[j]; + rcmax = f2cmax(d__1,d__2); +/* L10: */ + } + if (rcmin <= 0.) { + *info = -11; + } else if (*n > 0) { + rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + rowcnd = 1.; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = rcmin, d__2 = c__[j]; + rcmin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = rcmax, d__2 = c__[j]; + rcmax = f2cmax(d__1,d__2); +/* L20: */ + } + if (rcmin <= 0.) { + *info = -12; + } else if (*n > 0) { + colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + colcnd = 1.; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -14; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESVXX", &i__1, (ftnlen)7); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + zgeequb_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, + &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + zlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & + colcnd, &amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + +/* If the scaling factors are not applied, set them to 1.0. */ + + if (! rowequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + r__[j] = 1.; + } + } + if (! colequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 1.; + } + } + } + +/* Scale the right-hand side. */ + + if (notran) { + if (rowequ) { + zlascl2_(n, nrhs, &r__[1], &b[b_offset], ldb); + } + } else { + if (colequ) { + zlascl2_(n, nrhs, &c__[1], &b[b_offset], ldb); + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + zlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); + zgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + *rpvgrw = zla_gerpvgrw_(n, info, &a[a_offset], lda, &af[ + af_offset], ldaf); + return 0; + } + } + +/* Compute the reciprocal pivot growth factor RPVGRW. */ + + *rpvgrw = zla_gerpvgrw_(n, n, &a[a_offset], lda, &af[af_offset], ldaf); + +/* Compute the solution matrix X. */ + + zlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + zgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + zgerfsx_(trans, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & + ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, &x[x_offset], ldx, + rcond, &berr[1], n_err_bnds__, &err_bnds_norm__[ + err_bnds_norm_offset], &err_bnds_comp__[err_bnds_comp_offset], + nparams, ¶ms[1], &work[1], &rwork[1], info); + +/* Scale solutions. */ + + if (colequ && notran) { + zlascl2_(n, nrhs, &c__[1], &x[x_offset], ldx); + } else if (rowequ && ! notran) { + zlascl2_(n, nrhs, &r__[1], &x[x_offset], ldx); + } + + return 0; + +/* End of ZGESVXX */ + +} /* zgesvxx_ */ + diff --git a/lapack-netlib/SRC/zgetc2.c b/lapack-netlib/SRC/zgetc2.c new file mode 100644 index 000000000..2d67f9ad5 --- /dev/null +++ b/lapack-netlib/SRC/zgetc2.c @@ -0,0 +1,655 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGETC2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) */ + +/* INTEGER INFO, LDA, N */ +/* INTEGER IPIV( * ), JPIV( * ) */ +/* COMPLEX*16 A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGETC2 computes an LU factorization, using complete pivoting, of the */ +/* > n-by-n matrix A. The factorization has the form A = P * L * U * Q, */ +/* > where P and Q are permutation matrices, L is lower triangular with */ +/* > unit diagonal elements and U is upper triangular. */ +/* > */ +/* > This is a level 1 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the n-by-n matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U*Q; the unit diagonal elements of L are not stored. */ +/* > If U(k, k) appears to be less than SMIN, U(k, k) is given the */ +/* > value of SMIN, giving a nonsingular perturbed system. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1, N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N). */ +/* > The pivot indices; for 1 <= i <= N, row i of the */ +/* > matrix has been interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] JPIV */ +/* > \verbatim */ +/* > JPIV is INTEGER array, dimension (N). */ +/* > The pivot indices; for 1 <= j <= N, column j of the */ +/* > matrix has been interchanged with column JPIV(j). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > > 0: if INFO = k, U(k, k) is likely to produce overflow if */ +/* > one tries to solve for x in Ax = b. So U is perturbed */ +/* > to avoid the overflow. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16GEauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* > Umea University, S-901 87 Umea, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int zgetc2_(integer *n, doublecomplex *a, integer *lda, + integer *ipiv, integer *jpiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + doublereal smin, xmax; + integer i__, j; + extern /* Subroutine */ int zgeru_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *), dlabad_(doublereal *, + doublereal *); + extern doublereal dlamch_(char *); + integer ip, jp; + doublereal bignum, smlnum, eps; + integer ipv, jpv; + + +/* -- LAPACK auxiliary routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --jpiv; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Set constants to control overflow */ + + eps = dlamch_("P"); + smlnum = dlamch_("S") / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Handle the case N=1 by itself */ + + if (*n == 1) { + ipiv[1] = 1; + jpiv[1] = 1; + if (z_abs(&a[a_dim1 + 1]) < smlnum) { + *info = 1; + i__1 = a_dim1 + 1; + z__1.r = smlnum, z__1.i = 0.; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + return 0; + } + +/* Factorize A using complete pivoting. */ +/* Set pivots less than SMIN to SMIN */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Find f2cmax element in matrix A */ + + xmax = 0.; + i__2 = *n; + for (ip = i__; ip <= i__2; ++ip) { + i__3 = *n; + for (jp = i__; jp <= i__3; ++jp) { + if (z_abs(&a[ip + jp * a_dim1]) >= xmax) { + xmax = z_abs(&a[ip + jp * a_dim1]); + ipv = ip; + jpv = jp; + } +/* L10: */ + } +/* L20: */ + } + if (i__ == 1) { +/* Computing MAX */ + d__1 = eps * xmax; + smin = f2cmax(d__1,smlnum); + } + +/* Swap rows */ + + if (ipv != i__) { + zswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda); + } + ipiv[i__] = ipv; + +/* Swap columns */ + + if (jpv != i__) { + zswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], & + c__1); + } + jpiv[i__] = jpv; + +/* Check for singularity */ + + if (z_abs(&a[i__ + i__ * a_dim1]) < smin) { + *info = i__; + i__2 = i__ + i__ * a_dim1; + z__1.r = smin, z__1.i = 0.; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j + i__ * a_dim1; + z_div(&z__1, &a[j + i__ * a_dim1], &a[i__ + i__ * a_dim1]); + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L30: */ + } + i__2 = *n - i__; + i__3 = *n - i__; + zgeru_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[ + i__ + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda); +/* L40: */ + } + + if (z_abs(&a[*n + *n * a_dim1]) < smin) { + *info = *n; + i__1 = *n + *n * a_dim1; + z__1.r = smin, z__1.i = 0.; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + +/* Set last pivots to N */ + + ipiv[*n] = *n; + jpiv[*n] = *n; + + return 0; + +/* End of ZGETC2 */ + +} /* zgetc2_ */ + diff --git a/lapack-netlib/SRC/zgetf2.c b/lapack-netlib/SRC/zgetf2.c new file mode 100644 index 000000000..dc3c825a6 --- /dev/null +++ b/lapack-netlib/SRC/zgetf2.c @@ -0,0 +1,624 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row + interchanges (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGETF2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGETF2( M, N, A, LDA, IPIV, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGETF2 computes an LU factorization of a general m-by-n matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the right-looking Level 2 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the m by n matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgetf2_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + integer i__, j; + doublereal sfmin; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgeru_(integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *), zswap_(integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern doublereal dlamch_(char *); + integer jp; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer izamax_(integer *, doublecomplex *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETF2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = dlamch_("S"); + + i__1 = f2cmin(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + izamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + i__2 = jp + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + zswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + if (z_abs(&a[j + j * a_dim1]) >= sfmin) { + i__2 = *m - j; + z_div(&z__1, &c_b1, &a[j + j * a_dim1]); + zscal_(&i__2, &z__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ + j * a_dim1; + z_div(&z__1, &a[j + i__ + j * a_dim1], &a[j + j * + a_dim1]); + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L20: */ + } + } + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < f2cmin(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + z__1.r = -1., z__1.i = 0.; + zgeru_(&i__2, &i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, &a[j + + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda) + ; + } +/* L10: */ + } + return 0; + +/* End of ZGETF2 */ + +} /* zgetf2_ */ + diff --git a/lapack-netlib/SRC/zgetrf.c b/lapack-netlib/SRC/zgetrf.c new file mode 100644 index 000000000..2e2d3e1ad --- /dev/null +++ b/lapack-netlib/SRC/zgetrf.c @@ -0,0 +1,645 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGETRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGETRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGETRF computes an LU factorization of a general M-by-N matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the right-looking Level 3 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgetrf_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + integer i__, j, iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer * + , doublecomplex *, integer *); + integer jb, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, + integer *, integer *, integer *, integer *), zgetrf2_(integer *, + integer *, doublecomplex *, integer *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "ZGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= f2cmin(*m,*n)) { + +/* Use unblocked code. */ + + zgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = f2cmin(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = f2cmin(*m,*n) - j + 1; + jb = f2cmin(i__3,nb); + +/* Factor diagonal and subdiagonal blocks and test for exact */ +/* singularity. */ + + i__3 = *m - j + 1; + zgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = f2cmin(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b1, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + z__1.r = -1., z__1.i = 0.; + zgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &z__1, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b1, &a[j + jb + (j + jb) * + a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of ZGETRF */ + +} /* zgetrf_ */ + diff --git a/lapack-netlib/SRC/zgetrf2.c b/lapack-netlib/SRC/zgetrf2.c new file mode 100644 index 000000000..88dec9a1c --- /dev/null +++ b/lapack-netlib/SRC/zgetrf2.c @@ -0,0 +1,690 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGETRF2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGETRF2( M, N, A, LDA, IPIV, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGETRF2 computes an LU factorization of a general M-by-N matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the recursive version of the algorithm. It divides */ +/* > the matrix into four submatrices: */ +/* > */ +/* > [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 */ +/* > A = [ -----|----- ] with n1 = f2cmin(m,n)/2 */ +/* > [ A21 | A22 ] n2 = n-n1 */ +/* > */ +/* > [ A11 ] */ +/* > The subroutine calls itself to factor [ --- ], */ +/* > [ A12 ] */ +/* > [ A12 ] */ +/* > do the swaps on [ --- ], solve A12, update A22, */ +/* > [ A22 ] */ +/* > */ +/* > then calls itself to factor A22 and do the swaps on A21. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgetrf2_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublecomplex z__1; + + /* Local variables */ + doublecomplex temp; + integer i__, iinfo; + doublereal sfmin; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgemm_(char *, char *, integer *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer n1, n2; + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *); + extern doublereal dlamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer izamax_(integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, + integer *, integer *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETRF2", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (*m == 1) { + +/* Use unblocked code for one row case */ +/* Just need to handle IPIV and INFO */ + + ipiv[1] = 1; + i__1 = a_dim1 + 1; + if (a[i__1].r == 0. && a[i__1].i == 0.) { + *info = 1; + } + + } else if (*n == 1) { + +/* Use unblocked code for one column case */ + + +/* Compute machine safe minimum */ + + sfmin = dlamch_("S"); + +/* Find pivot and test for singularity */ + + i__ = izamax_(m, &a[a_dim1 + 1], &c__1); + ipiv[1] = i__; + i__1 = i__ + a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0.) { + +/* Apply the interchange */ + + if (i__ != 1) { + i__1 = a_dim1 + 1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = a_dim1 + 1; + i__2 = i__ + a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = i__ + a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + +/* Compute elements 2:M of the column */ + + if (z_abs(&a[a_dim1 + 1]) >= sfmin) { + i__1 = *m - 1; + z_div(&z__1, &c_b1, &a[a_dim1 + 1]); + zscal_(&i__1, &z__1, &a[a_dim1 + 2], &c__1); + } else { + i__1 = *m - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + 1 + a_dim1; + z_div(&z__1, &a[i__ + 1 + a_dim1], &a[a_dim1 + 1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L10: */ + } + } + + } else { + *info = 1; + } + } else { + +/* Use recursive code */ + + n1 = f2cmin(*m,*n) / 2; + n2 = *n - n1; + +/* [ A11 ] */ +/* Factor [ --- ] */ +/* [ A21 ] */ + + zgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + +/* [ A12 ] */ +/* Apply interchanges to [ --- ] */ +/* [ A22 ] */ + + zlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], & + c__1); + +/* Solve A12 */ + + ztrsm_("L", "L", "N", "U", &n1, &n2, &c_b1, &a[a_offset], lda, &a[(n1 + + 1) * a_dim1 + 1], lda); + +/* Update A22 */ + + i__1 = *m - n1; + z__1.r = -1., z__1.i = 0.; + zgemm_("N", "N", &i__1, &n2, &n1, &z__1, &a[n1 + 1 + a_dim1], lda, &a[ + (n1 + 1) * a_dim1 + 1], lda, &c_b1, &a[n1 + 1 + (n1 + 1) * + a_dim1], lda); + +/* Factor A22 */ + + i__1 = *m - n1; + zgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + + 1], &iinfo); + +/* Adjust INFO and the pivot indices */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + n1; + } + i__1 = f2cmin(*m,*n); + for (i__ = n1 + 1; i__ <= i__1; ++i__) { + ipiv[i__] += n1; +/* L20: */ + } + +/* Apply interchanges to A21 */ + + i__1 = n1 + 1; + i__2 = f2cmin(*m,*n); + zlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1); + + } + return 0; + +/* End of ZGETRF2 */ + +} /* zgetrf2_ */ + diff --git a/lapack-netlib/SRC/zgetri.c b/lapack-netlib/SRC/zgetri.c new file mode 100644 index 000000000..61b5d65e9 --- /dev/null +++ b/lapack-netlib/SRC/zgetri.c @@ -0,0 +1,700 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGETRI */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGETRI + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGETRI computes the inverse of a matrix using the LU factorization */ +/* > computed by ZGETRF. */ +/* > */ +/* > This method inverts U and then computes inv(A) by solving the system */ +/* > inv(A)*L = inv(U) for inv(A). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the factors L and U from the factorization */ +/* > A = P*L*U as computed by ZGETRF. */ +/* > On exit, if INFO = 0, the inverse of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from ZGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For optimal performance LWORK >= N*NB, where NB is */ +/* > the optimal blocksize returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero; the matrix is */ +/* > singular and its inverse could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgetri_(integer *n, doublecomplex *a, integer *lda, + integer *ipiv, doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + integer i__, j, nbmin; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, + integer *), ztrsm_(char *, char *, char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer jb, nb, jj, jp, nn; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + extern /* Subroutine */ int ztrtri_(char *, char *, integer *, + doublecomplex *, integer *, integer *); + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZGETRI", " ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*lda < f2cmax(1,*n)) { + *info = -3; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETRI", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form inv(U). If INFO > 0 from ZTRTRI, then U is singular, */ +/* and the inverse is not computed. */ + + ztrtri_("Upper", "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { +/* Computing MAX */ + i__1 = ldwork * nb; + iws = f2cmax(i__1,1); + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGETRI", " ", n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } else { + iws = *n; + } + +/* Solve the equation inv(A)*L = inv(U) for inv(A). */ + + if (nb < nbmin || nb >= *n) { + +/* Use unblocked code. */ + + for (j = *n; j >= 1; --j) { + +/* Copy current column of L to WORK and replace with zeros. */ + + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ + j * a_dim1; + work[i__2].r = a[i__3].r, work[i__2].i = a[i__3].i; + i__2 = i__ + j * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L10: */ + } + +/* Compute current column of inv(A). */ + + if (j < *n) { + i__1 = *n - j; + z__1.r = -1., z__1.i = 0.; + zgemv_("No transpose", n, &i__1, &z__1, &a[(j + 1) * a_dim1 + + 1], lda, &work[j + 1], &c__1, &c_b2, &a[j * a_dim1 + + 1], &c__1); + } +/* L20: */ + } + } else { + +/* Use blocked code. */ + + nn = (*n - 1) / nb * nb + 1; + i__1 = -nb; + for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *n - j + 1; + jb = f2cmin(i__2,i__3); + +/* Copy current block column of L to WORK and replace with */ +/* zeros. */ + + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = *n; + for (i__ = jj + 1; i__ <= i__3; ++i__) { + i__4 = i__ + (jj - j) * ldwork; + i__5 = i__ + jj * a_dim1; + work[i__4].r = a[i__5].r, work[i__4].i = a[i__5].i; + i__4 = i__ + jj * a_dim1; + a[i__4].r = 0., a[i__4].i = 0.; +/* L30: */ + } +/* L40: */ + } + +/* Compute current block column of inv(A). */ + + if (j + jb <= *n) { + i__2 = *n - j - jb + 1; + z__1.r = -1., z__1.i = 0.; + zgemm_("No transpose", "No transpose", n, &jb, &i__2, &z__1, & + a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &ldwork, + &c_b2, &a[j * a_dim1 + 1], lda); + } + ztrsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b2, & + work[j], &ldwork, &a[j * a_dim1 + 1], lda); +/* L50: */ + } + } + +/* Apply column interchanges. */ + + for (j = *n - 1; j >= 1; --j) { + jp = ipiv[j]; + if (jp != j) { + zswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); + } +/* L60: */ + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZGETRI */ + +} /* zgetri_ */ + diff --git a/lapack-netlib/SRC/zgetrs.c b/lapack-netlib/SRC/zgetrs.c new file mode 100644 index 000000000..572a380d2 --- /dev/null +++ b/lapack-netlib/SRC/zgetrs.c @@ -0,0 +1,621 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGETRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGETRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGETRS solves a system of linear equations */ +/* > A * X = B, A**T * X = B, or A**H * X = B */ +/* > with a general N-by-N matrix A using the LU factorization computed */ +/* > by ZGETRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by ZGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from ZGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgetrs_(char *trans, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + xerbla_(char *, integer *, ftnlen); + logical notran; + extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, + integer *, integer *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* Solve A * X = B. */ + +/* Apply row interchanges to the right hand sides. */ + + zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + ztrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b1, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b1, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* Solve A**T * X = B or A**H * X = B. */ + +/* Solve U**T *X = B or U**H *X = B, overwriting B with X. */ + + ztrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b1, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L**T *X = B, or L**H *X = B overwriting B with X. */ + + ztrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b1, &a[a_offset], + lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of ZGETRS */ + +} /* zgetrs_ */ + diff --git a/lapack-netlib/SRC/zgetsls.c b/lapack-netlib/SRC/zgetsls.c new file mode 100644 index 000000000..6ec2b8cf5 --- /dev/null +++ b/lapack-netlib/SRC/zgetsls.c @@ -0,0 +1,944 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGETSLS */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, */ +/* $ WORK, LWORK, INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGETSLS solves overdetermined or underdetermined complex linear systems */ +/* > involving an M-by-N matrix A, using a tall skinny QR or short wide LQ */ +/* > factorization of A. It is assumed that A has full rank. */ +/* > */ +/* > */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'C' and m >= n: find the minimum norm solution of */ +/* > an undetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'C' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'C': the linear system involves A**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > A is overwritten by details of its QR or LQ */ +/* > factorization as returned by ZGEQR or ZGELQ. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'C'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors. */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'C' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */ +/* > or optimal, if query was assumed) LWORK. */ +/* > See LWORK for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1 or -2, then a workspace query is assumed. */ +/* > If LWORK = -1, the routine calculates optimal size of WORK for the */ +/* > optimal performance and returns this value in WORK(1). */ +/* > If LWORK = -2, the routine calculates minimal size of WORK and */ +/* > returns this value in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup complex16GEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zgetsls_(char *trans, integer *m, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + real r__1; + doublereal d__1; + + /* Local variables */ + doublereal anrm, bnrm; + logical tran; + integer brow, tszm, tszo, info2, i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer minmn, maxmn; + extern /* Subroutine */ int zgelq_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + integer *), zgeqr_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + integer *); + doublecomplex workq[1]; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *); + doublecomplex tq[5]; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer scllen; + doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *), zgemlq_(char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex * + , integer *, doublecomplex *, integer *, doublecomplex *, integer + *, integer *), zlaset_(char *, integer *, integer + *, doublecomplex *, doublecomplex *, doublecomplex *, integer *), zgemqr_(char *, char *, integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal smlnum; + integer wsizem, wsizeo; + logical lquery; + integer lw1, lw2; + extern /* Subroutine */ int ztrtrs_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + integer *); + integer mnk; + doublereal dum[1]; + integer lwm, lwo; + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + maxmn = f2cmax(*m,*n); + mnk = f2cmax(minmn,*nrhs); + tran = lsame_(trans, "C"); + + lquery = *lwork == -1 || *lwork == -2; + if (! (lsame_(trans, "N") || lsame_(trans, "C"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } + } + + if (*info == 0) { + +/* Determine the block size and minimum LWORK */ + + if (*m >= *n) { + zgeqr_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2); + tszo = (integer) tq[0].r; + lwo = (integer) workq[0].r; + zgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszo, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwo, i__2 = (integer) workq[0].r; + lwo = f2cmax(i__1,i__2); + zgeqr_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2); + tszm = (integer) tq[0].r; + lwm = (integer) workq[0].r; + zgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszm, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwm, i__2 = (integer) workq[0].r; + lwm = f2cmax(i__1,i__2); + wsizeo = tszo + lwo; + wsizem = tszm + lwm; + } else { + zgelq_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2); + tszo = (integer) tq[0].r; + lwo = (integer) workq[0].r; + zgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszo, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwo, i__2 = (integer) workq[0].r; + lwo = f2cmax(i__1,i__2); + zgelq_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2); + tszm = (integer) tq[0].r; + lwm = (integer) workq[0].r; + zgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszm, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwm, i__2 = (integer) workq[0].r; + lwm = f2cmax(i__1,i__2); + wsizeo = tszo + lwo; + wsizem = tszm + lwm; + } + + if (*lwork < wsizem && ! lquery) { + *info = -10; + } + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETSLS", &i__1, (ftnlen)7); + d__1 = (doublereal) wsizeo; + work[1].r = d__1, work[1].i = 0.; + return 0; + } + if (lquery) { + if (*lwork == -1) { + r__1 = (real) wsizeo; + work[1].r = r__1, work[1].i = 0.f; + } + if (*lwork == -2) { + r__1 = (real) wsizem; + work[1].r = r__1, work[1].i = 0.f; + } + return 0; + } + if (*lwork < wsizeo) { + lw1 = tszm; + lw2 = lwm; + } else { + lw1 = tszo; + lw2 = lwo; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + zlaset_("FULL", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); + return 0; + } + +/* Get machine parameters */ + + smlnum = dlamch_("S") / dlamch_("P"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, dum); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + zlaset_("F", &maxmn, nrhs, &c_b1, &c_b1, &b[b_offset], ldb) + ; + goto L50; + } + + brow = *m; + if (tran) { + brow = *n; + } + bnrm = zlange_("M", &brow, nrhs, &b[b_offset], ldb, dum); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + zlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + zlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* compute QR factorization of A */ + + zgeqr_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, + info); + if (! tran) { + +/* Least-Squares Problem f2cmin || A * X - B || */ + +/* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */ + + zgemqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + +/* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + ztrtrs_("U", "N", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + if (*info > 0) { + return 0; + } + scllen = *n; + } else { + +/* Overdetermined system of equations A**T * X = B */ + +/* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + ztrtrs_("U", "C", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(N+1:M,1:NRHS) = CZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + +/* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ + + zgemqr_("L", "N", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + + scllen = *m; + + } + + } else { + +/* Compute LQ factorization of A */ + + zgelq_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, + info); + +/* workspace at least M, optimally M*NB. */ + + if (! tran) { + +/* underdetermined system of equations A * X = B */ + +/* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + ztrtrs_("L", "N", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(M+1:N,1:NRHS) = 0 */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */ + + zgemlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *n; + + } else { + +/* overdetermined system f2cmin || A**T * X - B || */ + +/* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ + + zgemlq_("L", "N", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + ztrtrs_("L", "C", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + zlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + zlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + zlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + +L50: + d__1 = (doublereal) (tszo + lwo); + work[1].r = d__1, work[1].i = 0.; + return 0; + +/* End of ZGETSLS */ + +} /* zgetsls_ */ + diff --git a/lapack-netlib/SRC/zgetsqrhrt.c b/lapack-netlib/SRC/zgetsqrhrt.c new file mode 100644 index 000000000..95fd257ce --- /dev/null +++ b/lapack-netlib/SRC/zgetsqrhrt.c @@ -0,0 +1,779 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGETSQRHRT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGETSQRHRT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, */ +/* $ LWORK, INFO ) */ +/* IMPLICIT NONE */ + +/* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 */ +/* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGETSQRHRT computes a NB2-sized column blocked QR-factorization */ +/* > of a complex M-by-N matrix A with M >= N, */ +/* > */ +/* > A = Q * R. */ +/* > */ +/* > The routine uses internally a NB1-sized column blocked and MB1-sized */ +/* > row blocked TSQR-factorization and perfors the reconstruction */ +/* > of the Householder vectors from the TSQR output. The routine also */ +/* > converts the R_tsqr factor from the TSQR-factorization output into */ +/* > the R factor that corresponds to the Householder QR-factorization, */ +/* > */ +/* > A = Q_tsqr * R_tsqr = Q * R. */ +/* > */ +/* > The output Q and R factors are stored in the same format as in ZGEQRT */ +/* > (Q is in blocked compact WY-representation). See the documentation */ +/* > of ZGEQRT for more details on the format. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MB1 */ +/* > \verbatim */ +/* > MB1 is INTEGER */ +/* > The row block size to be used in the blocked TSQR. */ +/* > MB1 > N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB1 */ +/* > \verbatim */ +/* > NB1 is INTEGER */ +/* > The column block size to be used in the blocked TSQR. */ +/* > N >= NB1 >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB2 */ +/* > \verbatim */ +/* > NB2 is INTEGER */ +/* > The block size to be used in the blocked QR that is */ +/* > output. NB2 >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > */ +/* > On entry: an M-by-N matrix A. */ +/* > */ +/* > On exit: */ +/* > a) the elements on and above the diagonal */ +/* > of the array contain the N-by-N upper-triangular */ +/* > matrix R corresponding to the Householder QR; */ +/* > b) the elements below the diagonal represent Q by */ +/* > the columns of blocked V (compact WY-representation). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is COMPLEX*16 array, dimension (LDT,N)) */ +/* > The upper triangular block reflectors stored in compact form */ +/* > as a sequence of upper triangular blocks. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= NB2. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > The dimension of the array WORK. */ +/* > LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ), */ +/* > where */ +/* > NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)), */ +/* > NB1LOCAL = MIN(NB1,N). */ +/* > LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL, */ +/* > LW1 = NB1LOCAL * N, */ +/* > LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ), */ +/* > If LWORK = -1, then a workspace query is assumed. */ +/* > The routine only calculates the optimal size of the WORK */ +/* > array, returns this value as the first entry of the WORK */ +/* > array, and no error message related to LWORK is issued */ +/* > by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup comlpex16OTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2020, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgetsqrhrt_(integer *m, integer *n, integer *mb1, + integer *nb1, integer *nb2, doublecomplex *a, integer *lda, + doublecomplex *t, integer *ldt, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3; + doublecomplex z__1, z__2; + + /* Local variables */ + integer ldwt, lworkopt, i__, j, iinfo; + extern /* Subroutine */ int zungtsqr_row_(integer *, integer *, integer * + , integer *, doublecomplex *, integer *, doublecomplex *, integer + *, doublecomplex *, integer *, integer *), zcopy_(integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zunhr_col_(integer *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *) + , xerbla_(char *, integer *, ftnlen); + logical lquery; + integer lw1, lw2, num_all_row_blocks__, lwt, nb1local, nb2local; + extern /* Subroutine */ int zlatsqr_(integer *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *m < *n) { + *info = -2; + } else if (*mb1 <= *n) { + *info = -3; + } else if (*nb1 < 1) { + *info = -4; + } else if (*nb2 < 1) { + *info = -5; + } else if (*lda < f2cmax(1,*m)) { + *info = -7; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = f2cmin(*nb2,*n); + if (*ldt < f2cmax(i__1,i__2)) { + *info = -9; + } else { + +/* Test the input LWORK for the dimension of the array WORK. */ +/* This workspace is used to store array: */ +/* a) Matrix T and WORK for ZLATSQR; */ +/* b) N-by-N upper-triangular factor R_tsqr; */ +/* c) Matrix T and array WORK for ZUNGTSQR_ROW; */ +/* d) Diagonal D for ZUNHR_COL. */ + + if (*lwork < *n * *n + 1 && ! lquery) { + *info = -11; + } else { + +/* Set block size for column blocks */ + + nb1local = f2cmin(*nb1,*n); + +/* Computing MAX */ + d__3 = (doublereal) (*m - *n) / (doublereal) (*mb1 - *n) + + .5f; + d__1 = 1., d__2 = d_int(&d__3); + num_all_row_blocks__ = (integer) f2cmax(d__1,d__2); + +/* Length and leading dimension of WORK array to place */ +/* T array in TSQR. */ + + lwt = num_all_row_blocks__ * *n * nb1local; + ldwt = nb1local; + +/* Length of TSQR work array */ + + lw1 = nb1local * *n; + +/* Length of ZUNGTSQR_ROW work array. */ + +/* Computing MAX */ + i__1 = nb1local, i__2 = *n - nb1local; + lw2 = nb1local * f2cmax(i__1,i__2); + +/* Computing MAX */ +/* Computing MAX */ + i__3 = lwt + *n * *n + lw2, i__4 = lwt + *n * *n + *n; + i__1 = lwt + lw1, i__2 = f2cmax(i__3,i__4); + lworkopt = f2cmax(i__1,i__2); + + if (*lwork < f2cmax(1,lworkopt) && ! lquery) { + *info = -11; + } + + } + } + } + +/* Handle error in the input parameters and return workspace query. */ + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETSQRHRT", &i__1, (ftnlen)10); + return 0; + } else if (lquery) { + z__1.r = (doublereal) lworkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + } + +/* Quick return if possible */ + + if (f2cmin(*m,*n) == 0) { + z__1.r = (doublereal) lworkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + } + + nb2local = f2cmin(*nb2,*n); + + +/* (1) Perform TSQR-factorization of the M-by-N matrix A. */ + + zlatsqr_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, &work[ + lwt + 1], &lw1, &iinfo); + +/* (2) Copy the factor R_tsqr stored in the upper-triangular part */ +/* of A into the square matrix in the work array */ +/* WORK(LWT+1:LWT+N*N) column-by-column. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + zcopy_(&j, &a[j * a_dim1 + 1], &c__1, &work[lwt + *n * (j - 1) + 1], & + c__1); + } + +/* (3) Generate a M-by-N matrix Q with orthonormal columns from */ +/* the result stored below the diagonal in the array A in place. */ + + zungtsqr_row_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, & + work[lwt + *n * *n + 1], &lw2, &iinfo); + +/* (4) Perform the reconstruction of Householder vectors from */ +/* the matrix Q (stored in A) in place. */ + + zunhr_col_(m, n, &nb2local, &a[a_offset], lda, &t[t_offset], ldt, &work[ + lwt + *n * *n + 1], &iinfo); + +/* (5) Copy the factor R_tsqr stored in the square matrix in the */ +/* work array WORK(LWT+1:LWT+N*N) into the upper-triangular */ +/* part of A. */ + +/* (6) Compute from R_tsqr the factor R_hr corresponding to */ +/* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr. */ +/* This multiplication by the sign matrix S on the left means */ +/* changing the sign of I-th row of the matrix R_tsqr according */ +/* to sign of the I-th diagonal element DIAG(I) of the matrix S. */ +/* DIAG is stored in WORK( LWT+N*N+1 ) from the ZUNHR_COL output. */ + +/* (5) and (6) can be combined in a single loop, so the rows in A */ +/* are accessed only once. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = lwt + *n * *n + i__; + z__1.r = -1., z__1.i = 0.; + if (work[i__2].r == z__1.r && work[i__2].i == z__1.i) { + i__2 = *n; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ + j * a_dim1; + z__2.r = -1., z__2.i = 0.; + i__4 = lwt + *n * (j - 1) + i__; + z__1.r = z__2.r * work[i__4].r - z__2.i * work[i__4].i, + z__1.i = z__2.r * work[i__4].i + z__2.i * work[i__4] + .r; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } else { + i__2 = *n - i__ + 1; + zcopy_(&i__2, &work[lwt + *n * (i__ - 1) + i__], n, &a[i__ + i__ * + a_dim1], lda); + } + } + + z__1.r = (doublereal) lworkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + +/* End of ZGETSQRHRT */ + +} /* zgetsqrhrt_ */ + diff --git a/lapack-netlib/SRC/zggbak.c b/lapack-netlib/SRC/zggbak.c new file mode 100644 index 000000000..d9daaf4ae --- /dev/null +++ b/lapack-netlib/SRC/zggbak.c @@ -0,0 +1,720 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGGBAK */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGBAK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, */ +/* LDV, INFO ) */ + +/* CHARACTER JOB, SIDE */ +/* INTEGER IHI, ILO, INFO, LDV, M, N */ +/* DOUBLE PRECISION LSCALE( * ), RSCALE( * ) */ +/* COMPLEX*16 V( LDV, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGBAK forms the right or left eigenvectors of a complex generalized */ +/* > eigenvalue problem A*x = lambda*B*x, by backward transformation on */ +/* > the computed eigenvectors of the balanced pair of matrices output by */ +/* > ZGGBAL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies the type of backward transformation required: */ +/* > = 'N': do nothing, return immediately; */ +/* > = 'P': do backward transformation for permutation only; */ +/* > = 'S': do backward transformation for scaling only; */ +/* > = 'B': do backward transformations for both permutation and */ +/* > scaling. */ +/* > JOB must be the same as the argument JOB supplied to ZGGBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'R': V contains right eigenvectors; */ +/* > = 'L': V contains left eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrix V. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > The integers ILO and IHI determined by ZGGBAL. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LSCALE */ +/* > \verbatim */ +/* > LSCALE is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the permutations and/or scaling factors applied */ +/* > to the left side of A and B, as returned by ZGGBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RSCALE */ +/* > \verbatim */ +/* > RSCALE is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the permutations and/or scaling factors applied */ +/* > to the right side of A and B, as returned by ZGGBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix V. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension (LDV,M) */ +/* > On entry, the matrix of right or left eigenvectors to be */ +/* > transformed, as returned by ZTGEVC. */ +/* > On exit, V is overwritten by the transformed eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the matrix V. LDV >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GBcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > See R.C. Ward, Balancing the generalized eigenvalue problem, */ +/* > SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, + doublecomplex *v, integer *ldv, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + integer i__, k; + extern logical lsame_(char *, char *); + logical leftv; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *); + logical rightv; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + --lscale; + --rscale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*n == 0 && *ihi == 0 && *ilo != 1) { + *info = -4; + } else if (*n > 0 && (*ihi < *ilo || *ihi > f2cmax(1,*n))) { + *info = -5; + } else if (*n == 0 && *ilo == 1 && *ihi != 0) { + *info = -5; + } else if (*m < 0) { + *info = -8; + } else if (*ldv < f2cmax(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGBAK", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + +/* Backward transformation on right eigenvectors */ + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + zdscal_(m, &rscale[i__], &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + +/* Backward transformation on left eigenvectors */ + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + zdscal_(m, &lscale[i__], &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + } + +/* Backward permutation */ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + +/* Backward permutation on right eigenvectors */ + + if (rightv) { + if (*ilo == 1) { + goto L50; + } + for (i__ = *ilo - 1; i__ >= 1; --i__) { + k = (integer) rscale[i__]; + if (k == i__) { + goto L40; + } + zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + +L50: + if (*ihi == *n) { + goto L70; + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + k = (integer) rscale[i__]; + if (k == i__) { + goto L60; + } + zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L60: + ; + } + } + +/* Backward permutation on left eigenvectors */ + +L70: + if (leftv) { + if (*ilo == 1) { + goto L90; + } + for (i__ = *ilo - 1; i__ >= 1; --i__) { + k = (integer) lscale[i__]; + if (k == i__) { + goto L80; + } + zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L80: + ; + } + +L90: + if (*ihi == *n) { + goto L110; + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + k = (integer) lscale[i__]; + if (k == i__) { + goto L100; + } + zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L100: + ; + } + } + } + +L110: + + return 0; + +/* End of ZGGBAK */ + +} /* zggbak_ */ + diff --git a/lapack-netlib/SRC/zggbal.c b/lapack-netlib/SRC/zggbal.c new file mode 100644 index 000000000..6f60ff271 --- /dev/null +++ b/lapack-netlib/SRC/zggbal.c @@ -0,0 +1,1097 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGGBAL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGBAL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, */ +/* RSCALE, WORK, INFO ) */ + +/* CHARACTER JOB */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, N */ +/* DOUBLE PRECISION LSCALE( * ), RSCALE( * ), WORK( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGBAL balances a pair of general complex matrices (A,B). This */ +/* > involves, first, permuting A and B by similarity transformations to */ +/* > isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N */ +/* > elements on the diagonal; and second, applying a diagonal similarity */ +/* > transformation to rows and columns ILO to IHI to make the rows */ +/* > and columns as close in norm as possible. Both steps are optional. */ +/* > */ +/* > Balancing may reduce the 1-norm of the matrices, and improve the */ +/* > accuracy of the computed eigenvalues and/or eigenvectors in the */ +/* > generalized eigenvalue problem A*x = lambda*B*x. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies the operations to be performed on A and B: */ +/* > = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 */ +/* > and RSCALE(I) = 1.0 for i=1,...,N; */ +/* > = 'P': permute only; */ +/* > = 'S': scale only; */ +/* > = 'B': both permute and scale. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the input matrix A. */ +/* > On exit, A is overwritten by the balanced matrix. */ +/* > If JOB = 'N', A is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,N) */ +/* > On entry, the input matrix B. */ +/* > On exit, B is overwritten by the balanced matrix. */ +/* > If JOB = 'N', B is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are set to integers such that on exit */ +/* > A(i,j) = 0 and B(i,j) = 0 if i > j and */ +/* > j = 1,...,ILO-1 or i = IHI+1,...,N. */ +/* > If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] LSCALE */ +/* > \verbatim */ +/* > LSCALE is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the left side of A and B. If P(j) is the index of the */ +/* > row interchanged with row j, and D(j) is the scaling factor */ +/* > applied to row j, then */ +/* > LSCALE(j) = P(j) for J = 1,...,ILO-1 */ +/* > = D(j) for J = ILO,...,IHI */ +/* > = P(j) for J = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RSCALE */ +/* > \verbatim */ +/* > RSCALE is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the right side of A and B. If P(j) is the index of the */ +/* > column interchanged with column j, and D(j) is the scaling */ +/* > factor applied to column j, then */ +/* > RSCALE(j) = P(j) for J = 1,...,ILO-1 */ +/* > = D(j) for J = ILO,...,IHI */ +/* > = P(j) for J = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (lwork) */ +/* > lwork must be at least f2cmax(1,6*N) when JOB = 'S' or 'B', and */ +/* > at least 1 when JOB = 'N' or 'P'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16GBcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > See R.C. WARD, Balancing the generalized eigenvalue problem, */ +/* > SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zggbal_(char *job, integer *n, doublecomplex *a, integer + *lda, doublecomplex *b, integer *ldb, integer *ilo, integer *ihi, + doublereal *lscale, doublereal *rscale, doublereal *work, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer lcab; + doublereal beta, coef; + integer irab, lrab; + doublereal basl, cmax; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal coef2, coef5; + integer i__, j, k, l, m; + doublereal gamma, t, alpha; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + doublereal sfmin, sfmax; + integer iflow; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + integer kount; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer jc; + doublereal ta, tb, tc; + extern doublereal dlamch_(char *); + integer ir, it; + doublereal ew; + integer nr; + doublereal pgamma; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + integer lsfmin; + extern integer izamax_(integer *, doublecomplex *, integer *); + integer lsfmax, ip1, jp1, lm1; + doublereal cab, rab, ewc, cor, sum; + integer nrp2, icab; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --lscale; + --rscale; + --work; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*ldb < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGBAL", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *ilo = 1; + *ihi = *n; + return 0; + } + + if (*n == 1) { + *ilo = 1; + *ihi = *n; + lscale[1] = 1.; + rscale[1] = 1.; + return 0; + } + + if (lsame_(job, "N")) { + *ilo = 1; + *ihi = *n; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + lscale[i__] = 1.; + rscale[i__] = 1.; +/* L10: */ + } + return 0; + } + + k = 1; + l = *n; + if (lsame_(job, "S")) { + goto L190; + } + + goto L30; + +/* Permute the matrices A and B to isolate the eigenvalues. */ + +/* Find row with one nonzero in columns 1 through L */ + +L20: + l = lm1; + if (l != 1) { + goto L30; + } + + rscale[1] = 1.; + lscale[1] = 1.; + goto L190; + +L30: + lm1 = l - 1; + for (i__ = l; i__ >= 1; --i__) { + i__1 = lm1; + for (j = 1; j <= i__1; ++j) { + jp1 = j + 1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + j * b_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0. || (b[i__3].r != 0. || b[ + i__3].i != 0.)) { + goto L50; + } +/* L40: */ + } + j = l; + goto L70; + +L50: + i__1 = l; + for (j = jp1; j <= i__1; ++j) { + i__2 = i__ + j * a_dim1; + i__3 = i__ + j * b_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0. || (b[i__3].r != 0. || b[ + i__3].i != 0.)) { + goto L80; + } +/* L60: */ + } + j = jp1 - 1; + +L70: + m = l; + iflow = 1; + goto L160; +L80: + ; + } + goto L100; + +/* Find column with one nonzero in rows K through N */ + +L90: + ++k; + +L100: + i__1 = l; + for (j = k; j <= i__1; ++j) { + i__2 = lm1; + for (i__ = k; i__ <= i__2; ++i__) { + ip1 = i__ + 1; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * b_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 0. || b[ + i__4].i != 0.)) { + goto L120; + } +/* L110: */ + } + i__ = l; + goto L140; +L120: + i__2 = l; + for (i__ = ip1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * b_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 0. || b[ + i__4].i != 0.)) { + goto L150; + } +/* L130: */ + } + i__ = ip1 - 1; +L140: + m = k; + iflow = 2; + goto L160; +L150: + ; + } + goto L190; + +/* Permute rows M and I */ + +L160: + lscale[m] = (doublereal) i__; + if (i__ == m) { + goto L170; + } + i__1 = *n - k + 1; + zswap_(&i__1, &a[i__ + k * a_dim1], lda, &a[m + k * a_dim1], lda); + i__1 = *n - k + 1; + zswap_(&i__1, &b[i__ + k * b_dim1], ldb, &b[m + k * b_dim1], ldb); + +/* Permute columns M and J */ + +L170: + rscale[m] = (doublereal) j; + if (j == m) { + goto L180; + } + zswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + zswap_(&l, &b[j * b_dim1 + 1], &c__1, &b[m * b_dim1 + 1], &c__1); + +L180: + switch (iflow) { + case 1: goto L20; + case 2: goto L90; + } + +L190: + *ilo = k; + *ihi = l; + + if (lsame_(job, "P")) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + lscale[i__] = 1.; + rscale[i__] = 1.; +/* L195: */ + } + return 0; + } + + if (*ilo == *ihi) { + return 0; + } + +/* Balance the submatrix in rows ILO to IHI. */ + + nr = *ihi - *ilo + 1; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + rscale[i__] = 0.; + lscale[i__] = 0.; + + work[i__] = 0.; + work[i__ + *n] = 0.; + work[i__ + (*n << 1)] = 0.; + work[i__ + *n * 3] = 0.; + work[i__ + (*n << 2)] = 0.; + work[i__ + *n * 5] = 0.; +/* L200: */ + } + +/* Compute right side vector in resulting linear equations */ + + basl = d_lg10(&c_b36); + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *ihi; + for (j = *ilo; j <= i__2; ++j) { + i__3 = i__ + j * a_dim1; + if (a[i__3].r == 0. && a[i__3].i == 0.) { + ta = 0.; + goto L210; + } + i__3 = i__ + j * a_dim1; + d__3 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + j * + a_dim1]), abs(d__2)); + ta = d_lg10(&d__3) / basl; + +L210: + i__3 = i__ + j * b_dim1; + if (b[i__3].r == 0. && b[i__3].i == 0.) { + tb = 0.; + goto L220; + } + i__3 = i__ + j * b_dim1; + d__3 = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = d_imag(&b[i__ + j * + b_dim1]), abs(d__2)); + tb = d_lg10(&d__3) / basl; + +L220: + work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb; + work[j + *n * 5] = work[j + *n * 5] - ta - tb; +/* L230: */ + } +/* L240: */ + } + + coef = 1. / (doublereal) (nr << 1); + coef2 = coef * coef; + coef5 = coef2 * .5; + nrp2 = nr + 2; + beta = 0.; + it = 1; + +/* Start generalized conjugate gradient iteration */ + +L250: + + gamma = ddot_(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)] + , &c__1) + ddot_(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + * + n * 5], &c__1); + + ew = 0.; + ewc = 0.; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + ew += work[i__ + (*n << 2)]; + ewc += work[i__ + *n * 5]; +/* L260: */ + } + +/* Computing 2nd power */ + d__1 = ew; +/* Computing 2nd power */ + d__2 = ewc; +/* Computing 2nd power */ + d__3 = ew - ewc; + gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * ( + d__3 * d__3); + if (gamma == 0.) { + goto L350; + } + if (it != 1) { + beta = gamma / pgamma; + } + t = coef5 * (ewc - ew * 3.); + tc = coef5 * (ew - ewc * 3.); + + dscal_(&nr, &beta, &work[*ilo], &c__1); + dscal_(&nr, &beta, &work[*ilo + *n], &c__1); + + daxpy_(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], & + c__1); + daxpy_(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1); + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + work[i__] += tc; + work[i__ + *n] += t; +/* L270: */ + } + +/* Apply matrix to vector */ + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + kount = 0; + sum = 0.; + i__2 = *ihi; + for (j = *ilo; j <= i__2; ++j) { + i__3 = i__ + j * a_dim1; + if (a[i__3].r == 0. && a[i__3].i == 0.) { + goto L280; + } + ++kount; + sum += work[j]; +L280: + i__3 = i__ + j * b_dim1; + if (b[i__3].r == 0. && b[i__3].i == 0.) { + goto L290; + } + ++kount; + sum += work[j]; +L290: + ; + } + work[i__ + (*n << 1)] = (doublereal) kount * work[i__ + *n] + sum; +/* L300: */ + } + + i__1 = *ihi; + for (j = *ilo; j <= i__1; ++j) { + kount = 0; + sum = 0.; + i__2 = *ihi; + for (i__ = *ilo; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + if (a[i__3].r == 0. && a[i__3].i == 0.) { + goto L310; + } + ++kount; + sum += work[i__ + *n]; +L310: + i__3 = i__ + j * b_dim1; + if (b[i__3].r == 0. && b[i__3].i == 0.) { + goto L320; + } + ++kount; + sum += work[i__ + *n]; +L320: + ; + } + work[j + *n * 3] = (doublereal) kount * work[j] + sum; +/* L330: */ + } + + sum = ddot_(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) + + ddot_(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1); + alpha = gamma / sum; + +/* Determine correction to current iteration */ + + cmax = 0.; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + cor = alpha * work[i__ + *n]; + if (abs(cor) > cmax) { + cmax = abs(cor); + } + lscale[i__] += cor; + cor = alpha * work[i__]; + if (abs(cor) > cmax) { + cmax = abs(cor); + } + rscale[i__] += cor; +/* L340: */ + } + if (cmax < .5) { + goto L350; + } + + d__1 = -alpha; + daxpy_(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)] + , &c__1); + d__1 = -alpha; + daxpy_(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], & + c__1); + + pgamma = gamma; + ++it; + if (it <= nrp2) { + goto L250; + } + +/* End generalized conjugate gradient iteration */ + +L350: + sfmin = dlamch_("S"); + sfmax = 1. / sfmin; + lsfmin = (integer) (d_lg10(&sfmin) / basl + 1.); + lsfmax = (integer) (d_lg10(&sfmax) / basl); + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *n - *ilo + 1; + irab = izamax_(&i__2, &a[i__ + *ilo * a_dim1], lda); + rab = z_abs(&a[i__ + (irab + *ilo - 1) * a_dim1]); + i__2 = *n - *ilo + 1; + irab = izamax_(&i__2, &b[i__ + *ilo * b_dim1], ldb); +/* Computing MAX */ + d__1 = rab, d__2 = z_abs(&b[i__ + (irab + *ilo - 1) * b_dim1]); + rab = f2cmax(d__1,d__2); + d__1 = rab + sfmin; + lrab = (integer) (d_lg10(&d__1) / basl + 1.); + ir = (integer) (lscale[i__] + d_sign(&c_b72, &lscale[i__])); +/* Computing MIN */ + i__2 = f2cmax(ir,lsfmin), i__2 = f2cmin(i__2,lsfmax), i__3 = lsfmax - lrab; + ir = f2cmin(i__2,i__3); + lscale[i__] = pow_di(&c_b36, &ir); + icab = izamax_(ihi, &a[i__ * a_dim1 + 1], &c__1); + cab = z_abs(&a[icab + i__ * a_dim1]); + icab = izamax_(ihi, &b[i__ * b_dim1 + 1], &c__1); +/* Computing MAX */ + d__1 = cab, d__2 = z_abs(&b[icab + i__ * b_dim1]); + cab = f2cmax(d__1,d__2); + d__1 = cab + sfmin; + lcab = (integer) (d_lg10(&d__1) / basl + 1.); + jc = (integer) (rscale[i__] + d_sign(&c_b72, &rscale[i__])); +/* Computing MIN */ + i__2 = f2cmax(jc,lsfmin), i__2 = f2cmin(i__2,lsfmax), i__3 = lsfmax - lcab; + jc = f2cmin(i__2,i__3); + rscale[i__] = pow_di(&c_b36, &jc); +/* L360: */ + } + +/* Row scaling of matrices A and B */ + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *n - *ilo + 1; + zdscal_(&i__2, &lscale[i__], &a[i__ + *ilo * a_dim1], lda); + i__2 = *n - *ilo + 1; + zdscal_(&i__2, &lscale[i__], &b[i__ + *ilo * b_dim1], ldb); +/* L370: */ + } + +/* Column scaling of matrices A and B */ + + i__1 = *ihi; + for (j = *ilo; j <= i__1; ++j) { + zdscal_(ihi, &rscale[j], &a[j * a_dim1 + 1], &c__1); + zdscal_(ihi, &rscale[j], &b[j * b_dim1 + 1], &c__1); +/* L380: */ + } + + return 0; + +/* End of ZGGBAL */ + +} /* zggbal_ */ + diff --git a/lapack-netlib/SRC/zgges.c b/lapack-netlib/SRC/zgges.c new file mode 100644 index 000000000..acac6525e --- /dev/null +++ b/lapack-netlib/SRC/zgges.c @@ -0,0 +1,1077 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGGES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors f +or GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGES + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, */ +/* SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, */ +/* LWORK, RWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVSL, JOBVSR, SORT */ +/* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM */ +/* LOGICAL BWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), */ +/* $ WORK( * ) */ +/* LOGICAL SELCTG */ +/* EXTERNAL SELCTG */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGES computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, the generalized complex Schur */ +/* > form (S, T), and optionally left and/or right Schur vectors (VSL */ +/* > and VSR). This gives the generalized Schur factorization */ +/* > */ +/* > (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) */ +/* > */ +/* > where (VSR)**H is the conjugate-transpose of VSR. */ +/* > */ +/* > Optionally, it also orders the eigenvalues so that a selected cluster */ +/* > of eigenvalues appears in the leading diagonal blocks of the upper */ +/* > triangular matrix S and the upper triangular matrix T. The leading */ +/* > columns of VSL and VSR then form an unitary basis for the */ +/* > corresponding left and right eigenspaces (deflating subspaces). */ +/* > */ +/* > (If only the generalized eigenvalues are needed, use the driver */ +/* > ZGGEV instead, which is faster.) */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* > or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* > usually represented as the pair (alpha,beta), as there is a */ +/* > reasonable interpretation for beta=0, and even for both being zero. */ +/* > */ +/* > A pair of matrices (S,T) is in generalized complex Schur form if S */ +/* > and T are upper triangular and, in addition, the diagonal elements */ +/* > of T are non-negative real numbers. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVSL */ +/* > \verbatim */ +/* > JOBVSL is CHARACTER*1 */ +/* > = 'N': do not compute the left Schur vectors; */ +/* > = 'V': compute the left Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVSR */ +/* > \verbatim */ +/* > JOBVSR is CHARACTER*1 */ +/* > = 'N': do not compute the right Schur vectors; */ +/* > = 'V': compute the right Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the generalized Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELCTG). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELCTG */ +/* > \verbatim */ +/* > SELCTG is a LOGICAL FUNCTION of two COMPLEX*16 arguments */ +/* > SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'N', SELCTG is not referenced. */ +/* > If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > An eigenvalue ALPHA(j)/BETA(j) is selected if */ +/* > SELCTG(ALPHA(j),BETA(j)) is true. */ +/* > */ +/* > Note that a selected complex eigenvalue may no longer satisfy */ +/* > SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned), in this */ +/* > case INFO is set to N+2 (See INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VSL, and VSR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the first of the pair of matrices. */ +/* > On exit, A has been overwritten by its generalized Schur */ +/* > form S. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the second of the pair of matrices. */ +/* > On exit, B has been overwritten by its generalized Schur */ +/* > form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELCTG is true. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), */ +/* > j=1,...,N are the diagonals of the complex Schur form (A,B) */ +/* > output by ZGGES. The BETA(j) will be non-negative real. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSL */ +/* > \verbatim */ +/* > VSL is COMPLEX*16 array, dimension (LDVSL,N) */ +/* > If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* > Not referenced if JOBVSL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSL */ +/* > \verbatim */ +/* > LDVSL is INTEGER */ +/* > The leading dimension of the matrix VSL. LDVSL >= 1, and */ +/* > if JOBVSL = 'V', LDVSL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSR */ +/* > \verbatim */ +/* > VSR is COMPLEX*16 array, dimension (LDVSR,N) */ +/* > If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* > Not referenced if JOBVSR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSR */ +/* > \verbatim */ +/* > LDVSR is INTEGER */ +/* > The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* > if JOBVSR = 'V', LDVSR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (8*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > =1,...,N: */ +/* > The QZ iteration failed. (A,B) are not in Schur */ +/* > form, but ALPHA(j) and BETA(j) should be correct for */ +/* > j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in ZHGEQZ */ +/* > =N+2: after reordering, roundoff changed values of */ +/* > some complex eigenvalues so that leading */ +/* > eigenvalues in the Generalized Schur form no */ +/* > longer satisfy SELCTG=.TRUE. This could also */ +/* > be caused due to scaling. */ +/* > =N+3: reordering failed in ZTGSEN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int zgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, integer *sdim, doublecomplex *alpha, doublecomplex * + beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer + *ldvsr, doublecomplex *work, integer *lwork, doublereal *rwork, + logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + + /* Local variables */ + doublereal anrm, bnrm; + integer idum[1], ierr, itau, iwrk; + doublereal pvsl, pvsr; + integer i__; + extern logical lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irwrk, irows; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *); + extern /* Subroutine */ int zggbak_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublecomplex *, + integer *, integer *), zggbal_(char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + , integer *, doublereal *, doublereal *, doublereal *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + doublereal bignum; + integer ijobvl, iright; + extern /* Subroutine */ int zgghrd_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ), zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + doublereal anrmto; + integer lwkmin; + logical lastsl; + doublereal bnrmto; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *), zhgeqz_( + char *, char *, char *, integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *), ztgsen_(integer + *, logical *, logical *, logical *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublecomplex *, integer *, integer *, integer *, integer *); + doublereal smlnum; + logical wantst, lquery; + integer lwkopt; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zunmqr_(char *, char *, integer *, integer + *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal dif[2]; + integer ihi, ilo; + doublereal eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1 * 1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1 * 1; + vsr -= vsr_offset; + --work; + --rwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = lsame_(sort, "S"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -3; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -14; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -16; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + lwkmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", n, &c__1, n, + &c__0, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + *n * ilaenv_(&c__1, "ZUNMQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); + if (ilvsl) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + if (*lwork < lwkmin && ! lquery) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGES ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = zlange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Real Workspace: need 6*N) */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + zggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Complex Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + zgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Complex Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + zunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ +/* (Complex Workspace: need N, prefer N*NB) */ + + if (ilvsl) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + zlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + zungqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + zgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); + + *sdim = 0; + +/* Perform QZ algorithm, computing Schur vectors if desired */ +/* (Complex Workspace: need N) */ +/* (Real Workspace: need N) */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + zhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, & + vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L30; + } + +/* Sort eigenvalues ALPHA/BETA if desired */ +/* (Workspace: none needed) */ + + if (wantst) { + +/* Undo scaling on eigenvalues before selecting */ + + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, &c__1, &alpha[1], n, + &ierr); + } + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alpha[i__], &beta[i__]); +/* L10: */ + } + + i__1 = *lwork - iwrk + 1; + ztgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, + &vsr[vsr_offset], ldvsr, sdim, &pvsl, &pvsr, dif, &work[iwrk], + &i__1, idum, &c__1, &ierr); + if (ierr == 1) { + *info = *n + 3; + } + + } + +/* Apply back-permutation to VSL and VSR */ +/* (Workspace: none needed) */ + + if (ilvsl) { + zggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsl[vsl_offset], ldvsl, &ierr); + } + if (ilvsr) { + zggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsr[vsr_offset], ldvsr, &ierr); + } + +/* Undo scaling */ + + if (ilascl) { + zlascl_("U", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + zlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + zlascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + zlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + *sdim = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alpha[i__], &beta[i__]); + if (cursl) { + ++(*sdim); + } + if (cursl && ! lastsl) { + *info = *n + 2; + } + lastsl = cursl; +/* L20: */ + } + + } + +L30: + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZGGES */ + +} /* zgges_ */ + diff --git a/lapack-netlib/SRC/zgges3.c b/lapack-netlib/SRC/zgges3.c new file mode 100644 index 000000000..266406b9a --- /dev/null +++ b/lapack-netlib/SRC/zgges3.c @@ -0,0 +1,1086 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGGES3 computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors +for GE matrices (blocked algorithm) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGES3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, */ +/* $ LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, */ +/* $ WORK, LWORK, RWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVSL, JOBVSR, SORT */ +/* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM */ +/* LOGICAL BWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), */ +/* $ WORK( * ) */ +/* LOGICAL SELCTG */ +/* EXTERNAL SELCTG */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGES3 computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, the generalized complex Schur */ +/* > form (S, T), and optionally left and/or right Schur vectors (VSL */ +/* > and VSR). This gives the generalized Schur factorization */ +/* > */ +/* > (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) */ +/* > */ +/* > where (VSR)**H is the conjugate-transpose of VSR. */ +/* > */ +/* > Optionally, it also orders the eigenvalues so that a selected cluster */ +/* > of eigenvalues appears in the leading diagonal blocks of the upper */ +/* > triangular matrix S and the upper triangular matrix T. The leading */ +/* > columns of VSL and VSR then form an unitary basis for the */ +/* > corresponding left and right eigenspaces (deflating subspaces). */ +/* > */ +/* > (If only the generalized eigenvalues are needed, use the driver */ +/* > ZGGEV instead, which is faster.) */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* > or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* > usually represented as the pair (alpha,beta), as there is a */ +/* > reasonable interpretation for beta=0, and even for both being zero. */ +/* > */ +/* > A pair of matrices (S,T) is in generalized complex Schur form if S */ +/* > and T are upper triangular and, in addition, the diagonal elements */ +/* > of T are non-negative real numbers. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVSL */ +/* > \verbatim */ +/* > JOBVSL is CHARACTER*1 */ +/* > = 'N': do not compute the left Schur vectors; */ +/* > = 'V': compute the left Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVSR */ +/* > \verbatim */ +/* > JOBVSR is CHARACTER*1 */ +/* > = 'N': do not compute the right Schur vectors; */ +/* > = 'V': compute the right Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the generalized Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELCTG). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELCTG */ +/* > \verbatim */ +/* > SELCTG is a LOGICAL FUNCTION of two COMPLEX*16 arguments */ +/* > SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'N', SELCTG is not referenced. */ +/* > If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > An eigenvalue ALPHA(j)/BETA(j) is selected if */ +/* > SELCTG(ALPHA(j),BETA(j)) is true. */ +/* > */ +/* > Note that a selected complex eigenvalue may no longer satisfy */ +/* > SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned), in this */ +/* > case INFO is set to N+2 (See INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VSL, and VSR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the first of the pair of matrices. */ +/* > On exit, A has been overwritten by its generalized Schur */ +/* > form S. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the second of the pair of matrices. */ +/* > On exit, B has been overwritten by its generalized Schur */ +/* > form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELCTG is true. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), */ +/* > j=1,...,N are the diagonals of the complex Schur form (A,B) */ +/* > output by ZGGES3. The BETA(j) will be non-negative real. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSL */ +/* > \verbatim */ +/* > VSL is COMPLEX*16 array, dimension (LDVSL,N) */ +/* > If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* > Not referenced if JOBVSL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSL */ +/* > \verbatim */ +/* > LDVSL is INTEGER */ +/* > The leading dimension of the matrix VSL. LDVSL >= 1, and */ +/* > if JOBVSL = 'V', LDVSL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSR */ +/* > \verbatim */ +/* > VSR is COMPLEX*16 array, dimension (LDVSR,N) */ +/* > If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* > Not referenced if JOBVSR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSR */ +/* > \verbatim */ +/* > LDVSR is INTEGER */ +/* > The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* > if JOBVSR = 'V', LDVSR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (8*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > =1,...,N: */ +/* > The QZ iteration failed. (A,B) are not in Schur */ +/* > form, but ALPHA(j) and BETA(j) should be correct for */ +/* > j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in ZHGEQZ */ +/* > =N+2: after reordering, roundoff changed values of */ +/* > some complex eigenvalues so that leading */ +/* > eigenvalues in the Generalized Schur form no */ +/* > longer satisfy SELCTG=.TRUE. This could also */ +/* > be caused due to scaling. */ +/* > =N+3: reordering failed in ZTGSEN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date January 2015 */ + +/* > \ingroup complex16GEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int zgges3_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, integer *sdim, doublecomplex *alpha, doublecomplex * + beta, doublecomplex *vsl, integer *ldvsl, doublecomplex *vsr, integer + *ldvsr, doublecomplex *work, integer *lwork, doublereal *rwork, + logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + doublecomplex z__1; + + /* Local variables */ + doublereal anrm, bnrm; + integer idum[1], ierr, itau, iwrk; + doublereal pvsl, pvsr; + integer i__; + extern logical lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irwrk, irows; + extern /* Subroutine */ int zgghd3_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *), dlabad_( + doublereal *, doublereal *); + extern doublereal dlamch_(char *); + extern /* Subroutine */ int zggbak_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublecomplex *, + integer *, integer *), zggbal_(char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + , integer *, doublereal *, doublereal *, doublereal *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + doublereal bignum; + integer ijobvl, iright; + extern /* Subroutine */ int zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + doublereal anrmto, bnrmto; + logical lastsl; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *), zhgeqz_( + char *, char *, char *, integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *), ztgsen_(integer + *, logical *, logical *, logical *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublecomplex *, integer *, integer *, integer *, integer *); + doublereal smlnum; + logical wantst, lquery; + integer lwkopt; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zunmqr_(char *, char *, integer *, integer + *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal dif[2]; + integer ihi, ilo; + doublereal eps; + + +/* -- LAPACK driver routine (version 3.6.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2015 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1 * 1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1 * 1; + vsr -= vsr_offset; + --work; + --rwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = lsame_(sort, "S"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -3; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -14; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -16; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -18; + } + } + +/* Compute workspace */ + + if (*info == 0) { + zgeqrf_(n, n, &b[b_offset], ldb, &work[1], &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = 1, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + zunmqr_("L", "C", n, n, n, &b[b_offset], ldb, &work[1], &a[a_offset], + lda, &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + if (ilvsl) { + zungqr_(n, n, n, &vsl[vsl_offset], ldvsl, &work[1], &work[1], & + c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } + zgghd3_(jobvsl, jobvsr, n, &c__1, n, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[ + 1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + zhgeqz_("S", jobvsl, jobvsr, n, &c__1, n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, + &vsr[vsr_offset], ldvsr, &work[1], &c_n1, &rwork[1], &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + if (wantst) { + ztgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, & + b[b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], + ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, &pvsr, dif, & + work[1], &c_n1, idum, &c__1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGES3 ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = zlange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + zggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + zgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ + + i__1 = *lwork + 1 - iwrk; + zunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ + + if (ilvsl) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + zlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + zungqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ + + i__1 = *lwork + 1 - iwrk; + zgghd3_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk] + , &i__1, &ierr); + + *sdim = 0; + +/* Perform QZ algorithm, computing Schur vectors if desired */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + zhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, & + vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L30; + } + +/* Sort eigenvalues ALPHA/BETA if desired */ + + if (wantst) { + +/* Undo scaling on eigenvalues before selecting */ + + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, &c__1, &alpha[1], n, + &ierr); + } + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alpha[i__], &beta[i__]); +/* L10: */ + } + + i__1 = *lwork - iwrk + 1; + ztgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, + &vsr[vsr_offset], ldvsr, sdim, &pvsl, &pvsr, dif, &work[iwrk], + &i__1, idum, &c__1, &ierr); + if (ierr == 1) { + *info = *n + 3; + } + + } + +/* Apply back-permutation to VSL and VSR */ + + if (ilvsl) { + zggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsl[vsl_offset], ldvsl, &ierr); + } + if (ilvsr) { + zggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsr[vsr_offset], ldvsr, &ierr); + } + +/* Undo scaling */ + + if (ilascl) { + zlascl_("U", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + zlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + zlascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + zlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + *sdim = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alpha[i__], &beta[i__]); + if (cursl) { + ++(*sdim); + } + if (cursl && ! lastsl) { + *info = *n + 2; + } + lastsl = cursl; +/* L20: */ + } + + } + +L30: + + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + + return 0; + +/* End of ZGGES3 */ + +} /* zgges3_ */ + diff --git a/lapack-netlib/SRC/zggesx.c b/lapack-netlib/SRC/zggesx.c new file mode 100644 index 000000000..26b3e46ad --- /dev/null +++ b/lapack-netlib/SRC/zggesx.c @@ -0,0 +1,1199 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGGESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors +for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGESX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, */ +/* B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, */ +/* LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, */ +/* IWORK, LIWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVSL, JOBVSR, SENSE, SORT */ +/* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, */ +/* $ SDIM */ +/* LOGICAL BWORK( * ) */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RCONDE( 2 ), RCONDV( 2 ), RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), */ +/* $ WORK( * ) */ +/* LOGICAL SELCTG */ +/* EXTERNAL SELCTG */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGESX computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, the complex Schur form (S,T), */ +/* > and, optionally, the left and/or right matrices of Schur vectors (VSL */ +/* > and VSR). This gives the generalized Schur factorization */ +/* > */ +/* > (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) */ +/* > */ +/* > where (VSR)**H is the conjugate-transpose of VSR. */ +/* > */ +/* > Optionally, it also orders the eigenvalues so that a selected cluster */ +/* > of eigenvalues appears in the leading diagonal blocks of the upper */ +/* > triangular matrix S and the upper triangular matrix T; computes */ +/* > a reciprocal condition number for the average of the selected */ +/* > eigenvalues (RCONDE); and computes a reciprocal condition number for */ +/* > the right and left deflating subspaces corresponding to the selected */ +/* > eigenvalues (RCONDV). The leading columns of VSL and VSR then form */ +/* > an orthonormal basis for the corresponding left and right eigenspaces */ +/* > (deflating subspaces). */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* > or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* > usually represented as the pair (alpha,beta), as there is a */ +/* > reasonable interpretation for beta=0 or for both being zero. */ +/* > */ +/* > A pair of matrices (S,T) is in generalized complex Schur form if T is */ +/* > upper triangular with non-negative diagonal and S is upper */ +/* > triangular. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVSL */ +/* > \verbatim */ +/* > JOBVSL is CHARACTER*1 */ +/* > = 'N': do not compute the left Schur vectors; */ +/* > = 'V': compute the left Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVSR */ +/* > \verbatim */ +/* > JOBVSR is CHARACTER*1 */ +/* > = 'N': do not compute the right Schur vectors; */ +/* > = 'V': compute the right Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the generalized Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELCTG). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELCTG */ +/* > \verbatim */ +/* > SELCTG is a LOGICAL FUNCTION of two COMPLEX*16 arguments */ +/* > SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'N', SELCTG is not referenced. */ +/* > If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > Note that a selected complex eigenvalue may no longer satisfy */ +/* > SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned), in this */ +/* > case INFO is set to N+3 see INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': None are computed; */ +/* > = 'E': Computed for average of selected eigenvalues only; */ +/* > = 'V': Computed for selected deflating subspaces only; */ +/* > = 'B': Computed for both. */ +/* > If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VSL, and VSR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the first of the pair of matrices. */ +/* > On exit, A has been overwritten by its generalized Schur */ +/* > form S. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the second of the pair of matrices. */ +/* > On exit, B has been overwritten by its generalized Schur */ +/* > form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELCTG is true. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. ALPHA(j) and BETA(j),j=1,...,N are */ +/* > the diagonals of the complex Schur form (S,T). BETA(j) will */ +/* > be non-negative real. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSL */ +/* > \verbatim */ +/* > VSL is COMPLEX*16 array, dimension (LDVSL,N) */ +/* > If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* > Not referenced if JOBVSL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSL */ +/* > \verbatim */ +/* > LDVSL is INTEGER */ +/* > The leading dimension of the matrix VSL. LDVSL >=1, and */ +/* > if JOBVSL = 'V', LDVSL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSR */ +/* > \verbatim */ +/* > VSR is COMPLEX*16 array, dimension (LDVSR,N) */ +/* > If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* > Not referenced if JOBVSR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSR */ +/* > \verbatim */ +/* > LDVSR is INTEGER */ +/* > The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* > if JOBVSR = 'V', LDVSR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is DOUBLE PRECISION array, dimension ( 2 ) */ +/* > If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the */ +/* > reciprocal condition numbers for the average of the selected */ +/* > eigenvalues. */ +/* > Not referenced if SENSE = 'N' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is DOUBLE PRECISION array, dimension ( 2 ) */ +/* > If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the */ +/* > reciprocal condition number for the selected deflating */ +/* > subspaces. */ +/* > Not referenced if SENSE = 'N' or 'E'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', */ +/* > LWORK >= MAX(1,2*N,2*SDIM*(N-SDIM)), else */ +/* > LWORK >= MAX(1,2*N). Note that 2*SDIM*(N-SDIM) <= N*N/2. */ +/* > Note also that an error is only returned if */ +/* > LWORK < MAX(1,2*N), but if SENSE = 'E' or 'V' or 'B' this may */ +/* > not be large enough. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the bound on the optimal size of the WORK */ +/* > array and the minimum size of the IWORK array, returns these */ +/* > values as the first entries of the WORK and IWORK arrays, and */ +/* > no error message related to LWORK or LIWORK is issued by */ +/* > XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension ( 8*N ) */ +/* > Real workspace. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise */ +/* > LIWORK >= N+2. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the bound on the optimal size of the */ +/* > WORK array and the minimum size of the IWORK array, returns */ +/* > these values as the first entries of the WORK and IWORK */ +/* > arrays, and no error message related to LWORK or LIWORK is */ +/* > issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. (A,B) are not in Schur */ +/* > form, but ALPHA(j) and BETA(j) should be correct for */ +/* > j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in ZHGEQZ */ +/* > =N+2: after reordering, roundoff changed values of */ +/* > some complex eigenvalues so that leading */ +/* > eigenvalues in the Generalized Schur form no */ +/* > longer satisfy SELCTG=.TRUE. This could also */ +/* > be caused due to scaling. */ +/* > =N+3: reordering failed in ZTGSEN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup complex16GEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int zggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, integer *sdim, doublecomplex *alpha, + doublecomplex *beta, doublecomplex *vsl, integer *ldvsl, + doublecomplex *vsr, integer *ldvsr, doublereal *rconde, doublereal * + rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *iwork, integer *liwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + + /* Local variables */ + integer ijob; + doublereal anrm, bnrm; + integer ierr, itau, iwrk, lwrk, i__; + extern logical lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irwrk, irows; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *); + doublereal pl, pr; + extern /* Subroutine */ int zggbak_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublecomplex *, + integer *, integer *), zggbal_(char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + , integer *, doublereal *, doublereal *, doublereal *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + doublereal bignum; + integer ijobvl, iright; + extern /* Subroutine */ int zgghrd_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ), zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *); + integer ijobvr; + logical wantsb; + integer liwmin; + logical wantse, lastsl; + doublereal anrmto, bnrmto; + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + integer maxwrk; + logical wantsn; + integer minwrk; + doublereal smlnum; + extern /* Subroutine */ int zhgeqz_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *), zlacpy_(char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex * + , integer *), zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *); + logical wantst, lquery, wantsv; + extern /* Subroutine */ int ztgsen_(integer *, logical *, logical *, + logical *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *, doublereal *, + doublereal *, doublereal *, doublecomplex *, integer *, integer *, + integer *, integer *), zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zunmqr_(char *, char *, integer *, integer + *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal dif[2]; + integer ihi, ilo; + doublereal eps; + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1 * 1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1 * 1; + vsr -= vsr_offset; + --rconde; + --rcondv; + --work; + --rwork; + --iwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = lsame_(sort, "S"); + wantsn = lsame_(sense, "N"); + wantse = lsame_(sense, "E"); + wantsv = lsame_(sense, "V"); + wantsb = lsame_(sense, "B"); + lquery = *lwork == -1 || *liwork == -1; + if (wantsn) { + ijob = 0; + } else if (wantse) { + ijob = 1; + } else if (wantsv) { + ijob = 2; + } else if (wantsb) { + ijob = 4; + } + +/* Test the input arguments */ + + *info = 0; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -3; + } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! + wantsn) { + *info = -5; + } else if (*n < 0) { + *info = -6; + } else if (*lda < f2cmax(1,*n)) { + *info = -8; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -15; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -17; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + if (*n > 0) { + minwrk = *n << 1; + maxwrk = *n * (ilaenv_(&c__1, "ZGEQRF", " ", n, &c__1, n, &c__0, ( + ftnlen)6, (ftnlen)1) + 1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * (ilaenv_(&c__1, "ZUNMQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1) + 1); + maxwrk = f2cmax(i__1,i__2); + if (ilvsl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * (ilaenv_(&c__1, "ZUNGQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1) + 1); + maxwrk = f2cmax(i__1,i__2); + } + lwrk = maxwrk; + if (ijob >= 1) { +/* Computing MAX */ + i__1 = lwrk, i__2 = *n * *n / 2; + lwrk = f2cmax(i__1,i__2); + } + } else { + minwrk = 1; + maxwrk = 1; + lwrk = 1; + } + work[1].r = (doublereal) lwrk, work[1].i = 0.; + if (wantsn || *n == 0) { + liwmin = 1; + } else { + liwmin = *n + 2; + } + iwork[1] = liwmin; + + if (*lwork < minwrk && ! lquery) { + *info = -21; + } else if (*liwork < liwmin && ! lquery) { + *info = -24; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGESX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = zlange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Real Workspace: need 6*N) */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + zggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Complex Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + zgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the unitary transformation to matrix A */ +/* (Complex Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + zunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ +/* (Complex Workspace: need N, prefer N*NB) */ + + if (ilvsl) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + zlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + zungqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + zgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); + + *sdim = 0; + +/* Perform QZ algorithm, computing Schur vectors if desired */ +/* (Complex Workspace: need N) */ +/* (Real Workspace: need N) */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + zhgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, & + vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L40; + } + +/* Sort eigenvalues ALPHA/BETA and compute the reciprocal of */ +/* condition number(s) */ + + if (wantst) { + +/* Undo scaling on eigenvalues before SELCTGing */ + + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, + &ierr); + } + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alpha[i__], &beta[i__]); +/* L10: */ + } + +/* Reorder eigenvalues, transform Generalized Schur vectors, and */ +/* compute reciprocal condition numbers */ +/* (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM)) */ +/* otherwise, need 1 ) */ + + i__1 = *lwork - iwrk + 1; + ztgsen_(&ijob, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, + &vsr[vsr_offset], ldvsr, sdim, &pl, &pr, dif, &work[iwrk], & + i__1, &iwork[1], liwork, &ierr); + + if (ijob >= 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*sdim << 1) * (*n - *sdim); + maxwrk = f2cmax(i__1,i__2); + } + if (ierr == -21) { + +/* not enough complex workspace */ + + *info = -21; + } else { + if (ijob == 1 || ijob == 4) { + rconde[1] = pl; + rconde[2] = pr; + } + if (ijob == 2 || ijob == 4) { + rcondv[1] = dif[0]; + rcondv[2] = dif[1]; + } + if (ierr == 1) { + *info = *n + 3; + } + } + + } + +/* Apply permutation to VSL and VSR */ +/* (Workspace: none needed) */ + + if (ilvsl) { + zggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsl[vsl_offset], ldvsl, &ierr); + } + + if (ilvsr) { + zggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsr[vsr_offset], ldvsr, &ierr); + } + +/* Undo scaling */ + + if (ilascl) { + zlascl_("U", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + zlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + zlascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + zlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + *sdim = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alpha[i__], &beta[i__]); + if (cursl) { + ++(*sdim); + } + if (cursl && ! lastsl) { + *info = *n + 2; + } + lastsl = cursl; +/* L30: */ + } + + } + +L40: + + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + iwork[1] = liwmin; + + return 0; + +/* End of ZGGESX */ + +} /* zggesx_ */ + diff --git a/lapack-netlib/SRC/zggev.c b/lapack-netlib/SRC/zggev.c new file mode 100644 index 000000000..baf50004d --- /dev/null +++ b/lapack-netlib/SRC/zggev.c @@ -0,0 +1,1055 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGGEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matr +ices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, */ +/* VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) */ + +/* CHARACTER JOBVL, JOBVR */ +/* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N */ +/* DOUBLE PRECISION RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGEV computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, and optionally, the left and/or */ +/* > right generalized eigenvectors. */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* > singular. It is usually represented as the pair (alpha,beta), as */ +/* > there is a reasonable interpretation for beta=0, and even for both */ +/* > being zero. */ +/* > */ +/* > The right generalized eigenvector v(j) corresponding to the */ +/* > generalized eigenvalue lambda(j) of (A,B) satisfies */ +/* > */ +/* > A * v(j) = lambda(j) * B * v(j). */ +/* > */ +/* > The left generalized eigenvector u(j) corresponding to the */ +/* > generalized eigenvalues lambda(j) of (A,B) satisfies */ +/* > */ +/* > u(j)**H * A = lambda(j) * u(j)**H * B */ +/* > */ +/* > where u(j)**H is the conjugate-transpose of u(j). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': do not compute the left generalized eigenvectors; */ +/* > = 'V': compute the left generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': do not compute the right generalized eigenvectors; */ +/* > = 'V': compute the right generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VL, and VR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the matrix A in the pair (A,B). */ +/* > On exit, A has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the matrix B in the pair (A,B). */ +/* > On exit, B has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is COMPLEX*16 array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left generalized eigenvectors u(j) are */ +/* > stored one after another in the columns of VL, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the matrix VL. LDVL >= 1, and */ +/* > if JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is COMPLEX*16 array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right generalized eigenvectors v(j) are */ +/* > stored one after another in the columns of VR, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the matrix VR. LDVR >= 1, and */ +/* > if JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (8*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > =1,...,N: */ +/* > The QZ iteration failed. No eigenvectors have been */ +/* > calculated, but ALPHA(j) and BETA(j) should be */ +/* > correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other then QZ iteration failed in DHGEQZ, */ +/* > =N+2: error return from DTGEVC. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complex16GEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int zggev_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer + *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer + *lwork, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + + /* Local variables */ + doublereal anrm, bnrm; + integer ierr, itau; + doublereal temp; + logical ilvl, ilvr; + integer iwrk; + extern logical lsame_(char *, char *); + integer ileft, icols, irwrk, irows; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + integer jc, in; + extern doublereal dlamch_(char *); + integer jr; + extern /* Subroutine */ int zggbak_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublecomplex *, + integer *, integer *), zggbal_(char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + , integer *, doublereal *, doublereal *, doublereal *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + logical ldumma[1]; + char chtemp[1]; + doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + integer ijobvl, iright; + extern /* Subroutine */ int zgghrd_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ), zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + doublereal anrmto; + integer lwkmin; + doublereal bnrmto; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *), ztgevc_( + char *, char *, logical *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, integer *, doublecomplex *, + doublereal *, integer *), zhgeqz_(char *, char *, + char *, integer *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublereal *, integer *); + doublereal smlnum; + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zunmqr_(char *, char *, integer *, integer + *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + integer ihi, ilo; + doublereal eps; + logical ilv; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + if (lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -11; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -13; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. The workspace is */ +/* computed assuming ILO = 1 and IHI = N, the worst case.) */ + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + lwkmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", n, &c__1, n, + &c__0, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + *n * ilaenv_(&c__1, "ZUNMQR", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); + if (ilvl) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + if (*lwork < lwkmin && ! lquery) { + *info = -15; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("E") * dlamch_("B"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = zlange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrices A, B to isolate eigenvalues if possible */ +/* (Real Workspace: need 6*N) */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + zggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Complex Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + if (ilv) { + icols = *n + 1 - ilo; + } else { + icols = irows; + } + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + zgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Complex Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + zunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL */ +/* (Complex Workspace: need N, prefer N*NB) */ + + if (ilvl) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vl[vl_offset], ldvl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + zlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ + ilo + 1 + ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + zungqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ + itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VR */ + + if (ilvr) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vr[vr_offset], ldvr); + } + +/* Reduce to generalized Hessenberg form */ + + if (ilv) { + +/* Eigenvectors requested -- work on whole matrix. */ + + zgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); + } else { + zgghrd_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, + &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur form and Schur vectors) */ +/* (Complex Workspace: need N) */ +/* (Real Workspace: need N) */ + + iwrk = itau; + if (ilv) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + i__1 = *lwork + 1 - iwrk; + zhgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L70; + } + +/* Compute Eigenvectors */ +/* (Real Workspace: need 2*N) */ +/* (Complex Workspace: need 2*N) */ + + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + + ztgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, + &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ + iwrk], &rwork[irwrk], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L70; + } + +/* Undo balancing on VL and VR and normalization */ +/* (Workspace: none needed) */ + + if (ilvl) { + zggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, + &vl[vl_offset], ldvl, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vl_dim1; + d__3 = temp, d__4 = (d__1 = vl[i__3].r, abs(d__1)) + ( + d__2 = d_imag(&vl[jr + jc * vl_dim1]), abs(d__2)); + temp = f2cmax(d__3,d__4); +/* L10: */ + } + if (temp < smlnum) { + goto L30; + } + temp = 1. / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vl_dim1; + i__4 = jr + jc * vl_dim1; + z__1.r = temp * vl[i__4].r, z__1.i = temp * vl[i__4].i; + vl[i__3].r = z__1.r, vl[i__3].i = z__1.i; +/* L20: */ + } +L30: + ; + } + } + if (ilvr) { + zggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, + &vr[vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vr_dim1; + d__3 = temp, d__4 = (d__1 = vr[i__3].r, abs(d__1)) + ( + d__2 = d_imag(&vr[jr + jc * vr_dim1]), abs(d__2)); + temp = f2cmax(d__3,d__4); +/* L40: */ + } + if (temp < smlnum) { + goto L60; + } + temp = 1. / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vr_dim1; + i__4 = jr + jc * vr_dim1; + z__1.r = temp * vr[i__4].r, z__1.i = temp * vr[i__4].i; + vr[i__3].r = z__1.r, vr[i__3].i = z__1.i; +/* L50: */ + } +L60: + ; + } + } + } + +/* Undo scaling if necessary */ + +L70: + + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZGGEV */ + +} /* zggev_ */ + diff --git a/lapack-netlib/SRC/zggev3.c b/lapack-netlib/SRC/zggev3.c new file mode 100644 index 000000000..7eebf8330 --- /dev/null +++ b/lapack-netlib/SRC/zggev3.c @@ -0,0 +1,1069 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGGEV3 computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat +rices (blocked algorithm) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGEV3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, */ +/* VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) */ + +/* CHARACTER JOBVL, JOBVR */ +/* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N */ +/* DOUBLE PRECISION RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGEV3 computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, and optionally, the left and/or */ +/* > right generalized eigenvectors. */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* > singular. It is usually represented as the pair (alpha,beta), as */ +/* > there is a reasonable interpretation for beta=0, and even for both */ +/* > being zero. */ +/* > */ +/* > The right generalized eigenvector v(j) corresponding to the */ +/* > generalized eigenvalue lambda(j) of (A,B) satisfies */ +/* > */ +/* > A * v(j) = lambda(j) * B * v(j). */ +/* > */ +/* > The left generalized eigenvector u(j) corresponding to the */ +/* > generalized eigenvalues lambda(j) of (A,B) satisfies */ +/* > */ +/* > u(j)**H * A = lambda(j) * u(j)**H * B */ +/* > */ +/* > where u(j)**H is the conjugate-transpose of u(j). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': do not compute the left generalized eigenvectors; */ +/* > = 'V': compute the left generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': do not compute the right generalized eigenvectors; */ +/* > = 'V': compute the right generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VL, and VR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the matrix A in the pair (A,B). */ +/* > On exit, A has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the matrix B in the pair (A,B). */ +/* > On exit, B has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is COMPLEX*16 array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left generalized eigenvectors u(j) are */ +/* > stored one after another in the columns of VL, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the matrix VL. LDVL >= 1, and */ +/* > if JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is COMPLEX*16 array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right generalized eigenvectors v(j) are */ +/* > stored one after another in the columns of VR, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the matrix VR. LDVR >= 1, and */ +/* > if JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (8*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > =1,...,N: */ +/* > The QZ iteration failed. No eigenvectors have been */ +/* > calculated, but ALPHA(j) and BETA(j) should be */ +/* > correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other then QZ iteration failed in DHGEQZ, */ +/* > =N+2: error return from DTGEVC. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date January 2015 */ + +/* > \ingroup complex16GEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int zggev3_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer + *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer + *lwork, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + + /* Local variables */ + doublereal anrm, bnrm; + integer ierr, itau; + doublereal temp; + logical ilvl, ilvr; + integer iwrk; + extern logical lsame_(char *, char *); + integer ileft, icols, irwrk, irows; + extern /* Subroutine */ int zgghd3_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *), dlabad_( + doublereal *, doublereal *); + integer jc, in; + extern doublereal dlamch_(char *); + integer jr; + extern /* Subroutine */ int zggbak_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublecomplex *, + integer *, integer *), zggbal_(char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + , integer *, doublereal *, doublereal *, doublereal *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical ldumma[1]; + char chtemp[1]; + doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + integer ijobvl, iright; + extern /* Subroutine */ int zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + doublereal anrmto, bnrmto; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *), ztgevc_( + char *, char *, logical *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, integer *, doublecomplex *, + doublereal *, integer *), zhgeqz_(char *, char *, + char *, integer *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublereal *, integer *); + doublereal smlnum; + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zunmqr_(char *, char *, integer *, integer + *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + integer ihi, ilo; + doublereal eps; + logical ilv; + + +/* -- LAPACK driver routine (version 3.6.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2015 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + if (lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -11; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -13; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -15; + } + } + +/* Compute workspace */ + + if (*info == 0) { + zgeqrf_(n, n, &b[b_offset], ldb, &work[1], &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = 1, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + zunmqr_("L", "C", n, n, n, &b[b_offset], ldb, &work[1], &a[a_offset], + lda, &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + if (ilvl) { + zungqr_(n, n, n, &vl[vl_offset], ldvl, &work[1], &work[1], &c_n1, + &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } + if (ilv) { + zgghd3_(jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[b_offset] + , ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[ + 1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + zhgeqz_("S", jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, &work[1], &c_n1, &rwork[1], &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } else { + zgghd3_(jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[b_offset] + , ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[ + 1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + zhgeqz_("E", jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, &work[1], &c_n1, &rwork[1], &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGEV3 ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("E") * dlamch_("B"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = zlange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrices A, B to isolate eigenvalues if possible */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + zggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ + + irows = ihi + 1 - ilo; + if (ilv) { + icols = *n + 1 - ilo; + } else { + icols = irows; + } + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + zgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ + + i__1 = *lwork + 1 - iwrk; + zunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL */ + + if (ilvl) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vl[vl_offset], ldvl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + zlacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ + ilo + 1 + ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + zungqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ + itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VR */ + + if (ilvr) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vr[vr_offset], ldvr); + } + +/* Reduce to generalized Hessenberg form */ + + if (ilv) { + +/* Eigenvectors requested -- work on whole matrix. */ + + i__1 = *lwork + 1 - iwrk; + zgghd3_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[iwrk], + &i__1, &ierr); + } else { + i__1 = *lwork + 1 - iwrk; + zgghd3_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, + &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur form and Schur vectors) */ + + iwrk = itau; + if (ilv) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + i__1 = *lwork + 1 - iwrk; + zhgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L70; + } + +/* Compute Eigenvectors */ + + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + + ztgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, + &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ + iwrk], &rwork[irwrk], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L70; + } + +/* Undo balancing on VL and VR and normalization */ + + if (ilvl) { + zggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, + &vl[vl_offset], ldvl, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vl_dim1; + d__3 = temp, d__4 = (d__1 = vl[i__3].r, abs(d__1)) + ( + d__2 = d_imag(&vl[jr + jc * vl_dim1]), abs(d__2)); + temp = f2cmax(d__3,d__4); +/* L10: */ + } + if (temp < smlnum) { + goto L30; + } + temp = 1. / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vl_dim1; + i__4 = jr + jc * vl_dim1; + z__1.r = temp * vl[i__4].r, z__1.i = temp * vl[i__4].i; + vl[i__3].r = z__1.r, vl[i__3].i = z__1.i; +/* L20: */ + } +L30: + ; + } + } + if (ilvr) { + zggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, + &vr[vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vr_dim1; + d__3 = temp, d__4 = (d__1 = vr[i__3].r, abs(d__1)) + ( + d__2 = d_imag(&vr[jr + jc * vr_dim1]), abs(d__2)); + temp = f2cmax(d__3,d__4); +/* L40: */ + } + if (temp < smlnum) { + goto L60; + } + temp = 1. / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vr_dim1; + i__4 = jr + jc * vr_dim1; + z__1.r = temp * vr[i__4].r, z__1.i = temp * vr[i__4].i; + vr[i__3].r = z__1.r, vr[i__3].i = z__1.i; +/* L50: */ + } +L60: + ; + } + } + } + +/* Undo scaling if necessary */ + +L70: + + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + +/* End of ZGGEV3 */ + +} /* zggev3_ */ + diff --git a/lapack-netlib/SRC/zggevx.c b/lapack-netlib/SRC/zggevx.c new file mode 100644 index 000000000..f1b05372a --- /dev/null +++ b/lapack-netlib/SRC/zggevx.c @@ -0,0 +1,1315 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGGEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGEVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, */ +/* ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI, */ +/* LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV, */ +/* WORK, LWORK, RWORK, IWORK, BWORK, INFO ) */ + +/* CHARACTER BALANC, JOBVL, JOBVR, SENSE */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N */ +/* DOUBLE PRECISION ABNRM, BBNRM */ +/* LOGICAL BWORK( * ) */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION LSCALE( * ), RCONDE( * ), RCONDV( * ), */ +/* $ RSCALE( * ), RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGEVX computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B) the generalized eigenvalues, and optionally, the left and/or */ +/* > right generalized eigenvectors. */ +/* > */ +/* > Optionally, it also computes a balancing transformation to improve */ +/* > the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ +/* > LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for */ +/* > the eigenvalues (RCONDE), and reciprocal condition numbers for the */ +/* > right eigenvectors (RCONDV). */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* > singular. It is usually represented as the pair (alpha,beta), as */ +/* > there is a reasonable interpretation for beta=0, and even for both */ +/* > being zero. */ +/* > */ +/* > The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > A * v(j) = lambda(j) * B * v(j) . */ +/* > The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > u(j)**H * A = lambda(j) * u(j)**H * B. */ +/* > where u(j)**H is the conjugate-transpose of u(j). */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] BALANC */ +/* > \verbatim */ +/* > BALANC is CHARACTER*1 */ +/* > Specifies the balance option to be performed: */ +/* > = 'N': do not diagonally scale or permute; */ +/* > = 'P': permute only; */ +/* > = 'S': scale only; */ +/* > = 'B': both permute and scale. */ +/* > Computed reciprocal condition numbers will be for the */ +/* > matrices after permuting and/or balancing. Permuting does */ +/* > not change condition numbers (in exact arithmetic), but */ +/* > balancing does. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': do not compute the left generalized eigenvectors; */ +/* > = 'V': compute the left generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': do not compute the right generalized eigenvectors; */ +/* > = 'V': compute the right generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': none are computed; */ +/* > = 'E': computed for eigenvalues only; */ +/* > = 'V': computed for eigenvectors only; */ +/* > = 'B': computed for eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VL, and VR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the matrix A in the pair (A,B). */ +/* > On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' */ +/* > or both, then A contains the first part of the complex Schur */ +/* > form of the "balanced" versions of the input A and B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the matrix B in the pair (A,B). */ +/* > On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' */ +/* > or both, then B contains the second part of the complex */ +/* > Schur form of the "balanced" versions of the input A and B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized */ +/* > eigenvalues. */ +/* > */ +/* > Note: the quotient ALPHA(j)/BETA(j) ) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio ALPHA/BETA. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is COMPLEX*16 array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left generalized eigenvectors u(j) are */ +/* > stored one after another in the columns of VL, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector will be scaled so the largest component */ +/* > will have abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the matrix VL. LDVL >= 1, and */ +/* > if JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is COMPLEX*16 array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right generalized eigenvectors v(j) are */ +/* > stored one after another in the columns of VR, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector will be scaled so the largest component */ +/* > will have abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the matrix VR. LDVR >= 1, and */ +/* > if JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are integer values such that on exit */ +/* > A(i,j) = 0 and B(i,j) = 0 if i > j and */ +/* > j = 1,...,ILO-1 or i = IHI+1,...,N. */ +/* > If BALANC = 'N' or 'S', ILO = 1 and IHI = N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] LSCALE */ +/* > \verbatim */ +/* > LSCALE is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the left side of A and B. If PL(j) is the index of the */ +/* > row interchanged with row j, and DL(j) is the scaling */ +/* > factor applied to row j, then */ +/* > LSCALE(j) = PL(j) for j = 1,...,ILO-1 */ +/* > = DL(j) for j = ILO,...,IHI */ +/* > = PL(j) for j = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RSCALE */ +/* > \verbatim */ +/* > RSCALE is DOUBLE PRECISION array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the right side of A and B. If PR(j) is the index of the */ +/* > column interchanged with column j, and DR(j) is the scaling */ +/* > factor applied to column j, then */ +/* > RSCALE(j) = PR(j) for j = 1,...,ILO-1 */ +/* > = DR(j) for j = ILO,...,IHI */ +/* > = PR(j) for j = IHI+1,...,N */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ABNRM */ +/* > \verbatim */ +/* > ABNRM is DOUBLE PRECISION */ +/* > The one-norm of the balanced matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BBNRM */ +/* > \verbatim */ +/* > BBNRM is DOUBLE PRECISION */ +/* > The one-norm of the balanced matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is DOUBLE PRECISION array, dimension (N) */ +/* > If SENSE = 'E' or 'B', the reciprocal condition numbers of */ +/* > the eigenvalues, stored in consecutive elements of the array. */ +/* > If SENSE = 'N' or 'V', RCONDE is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is DOUBLE PRECISION array, dimension (N) */ +/* > If JOB = 'V' or 'B', the estimated reciprocal condition */ +/* > numbers of the eigenvectors, stored in consecutive elements */ +/* > of the array. If the eigenvalues cannot be reordered to */ +/* > compute RCONDV(j), RCONDV(j) is set to 0; this can only occur */ +/* > when the true value would be very small anyway. */ +/* > If SENSE = 'N' or 'E', RCONDV is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > If SENSE = 'E', LWORK >= f2cmax(1,4*N). */ +/* > If SENSE = 'V' or 'B', LWORK >= f2cmax(1,2*N*N+2*N). */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (lrwork) */ +/* > lrwork must be at least f2cmax(1,6*N) if BALANC = 'S' or 'B', */ +/* > and at least f2cmax(1,2*N) otherwise. */ +/* > Real workspace. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N+2) */ +/* > If SENSE = 'E', IWORK is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > If SENSE = 'N', BWORK is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. No eigenvectors have been */ +/* > calculated, but ALPHA(j) and BETA(j) should be correct */ +/* > for j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in ZHGEQZ. */ +/* > =N+2: error return from ZTGEVC. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complex16GEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Balancing a matrix pair (A,B) includes, first, permuting rows and */ +/* > columns to isolate eigenvalues, second, applying diagonal similarity */ +/* > transformation to the rows and columns to make the rows and columns */ +/* > as close in norm as possible. The computed reciprocal condition */ +/* > numbers correspond to the balanced matrix. Permuting rows and columns */ +/* > will not change the condition numbers (in exact arithmetic) but */ +/* > diagonal scaling will. For further explanation of balancing, see */ +/* > section 4.11.1.2 of LAPACK Users' Guide. */ +/* > */ +/* > An approximate error bound on the chordal distance between the i-th */ +/* > computed generalized eigenvalue w and the corresponding exact */ +/* > eigenvalue lambda is */ +/* > */ +/* > chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) */ +/* > */ +/* > An approximate error bound for the angle between the i-th computed */ +/* > eigenvector VL(i) or VR(i) is given by */ +/* > */ +/* > EPS * norm(ABNRM, BBNRM) / DIF(i). */ +/* > */ +/* > For further explanation of the reciprocal condition numbers RCONDE */ +/* > and RCONDV, see section 4.11 of LAPACK User's Guide. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *alpha, doublecomplex *beta, + doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, + integer *ilo, integer *ihi, doublereal *lscale, doublereal *rscale, + doublereal *abnrm, doublereal *bbnrm, doublereal *rconde, doublereal * + rcondv, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *iwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + + /* Local variables */ + doublereal anrm, bnrm; + integer ierr, itau; + doublereal temp; + logical ilvl, ilvr; + integer iwrk, iwrk1, i__, j, m; + extern logical lsame_(char *, char *); + integer icols; + logical noscl; + integer irows; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + integer jc, in; + extern doublereal dlamch_(char *); + integer jr; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), zggbak_(char *, char *, integer *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublecomplex *, integer *, integer *), zggbal_( + char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + logical ldumma[1]; + char chtemp[1]; + doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + integer ijobvl; + extern /* Subroutine */ int zgghrd_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ), zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *); + integer ijobvr; + logical wantsb; + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + doublereal anrmto; + logical wantse; + doublereal bnrmto; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *), ztgevc_( + char *, char *, logical *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, integer *, doublecomplex *, + doublereal *, integer *), ztgsna_(char *, char *, + logical *, integer *, doublecomplex *, integer *, doublecomplex * + , integer *, doublecomplex *, integer *, doublecomplex *, integer + *, doublereal *, doublereal *, integer *, integer *, + doublecomplex *, integer *, integer *, integer *); + integer minwrk; + extern /* Subroutine */ int zhgeqz_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *); + integer maxwrk; + logical wantsn; + doublereal smlnum; + logical lquery, wantsv; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zunmqr_(char *, char *, integer *, integer + *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal eps; + logical ilv; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --lscale; + --rscale; + --rconde; + --rcondv; + --work; + --rwork; + --iwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + + noscl = lsame_(balanc, "N") || lsame_(balanc, "P"); + wantsn = lsame_(sense, "N"); + wantse = lsame_(sense, "E"); + wantsv = lsame_(sense, "V"); + wantsb = lsame_(sense, "B"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (! (noscl || lsame_(balanc, "S") || lsame_( + balanc, "B"))) { + *info = -1; + } else if (ijobvl <= 0) { + *info = -2; + } else if (ijobvr <= 0) { + *info = -3; + } else if (! (wantsn || wantse || wantsb || wantsv)) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -13; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -15; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. The workspace is */ +/* computed assuming ILO = 1 and IHI = N, the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + minwrk = *n << 1; + if (wantse) { + minwrk = *n << 2; + } else if (wantsv || wantsb) { + minwrk = (*n << 1) * (*n + 1); + } + maxwrk = minwrk; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "ZUNMQR", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + if (ilvl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR", + " ", n, &c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + } + } + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + + if (*lwork < minwrk && ! lquery) { + *info = -25; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGEVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_("P"); + smlnum = dlamch_("S"); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0. && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = zlange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0. && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute and/or balance the matrix pair (A,B) */ +/* (Real Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) */ + + zggbal_(balanc, n, &a[a_offset], lda, &b[b_offset], ldb, ilo, ihi, & + lscale[1], &rscale[1], &rwork[1], &ierr); + +/* Compute ABNRM and BBNRM */ + + *abnrm = zlange_("1", n, n, &a[a_offset], lda, &rwork[1]); + if (ilascl) { + rwork[1] = *abnrm; + dlascl_("G", &c__0, &c__0, &anrmto, &anrm, &c__1, &c__1, &rwork[1], & + c__1, &ierr); + *abnrm = rwork[1]; + } + + *bbnrm = zlange_("1", n, n, &b[b_offset], ldb, &rwork[1]); + if (ilbscl) { + rwork[1] = *bbnrm; + dlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, &c__1, &c__1, &rwork[1], & + c__1, &ierr); + *bbnrm = rwork[1]; + } + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Complex Workspace: need N, prefer N*NB ) */ + + irows = *ihi + 1 - *ilo; + if (ilv || ! wantsn) { + icols = *n + 1 - *ilo; + } else { + icols = irows; + } + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + zgeqrf_(&irows, &icols, &b[*ilo + *ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the unitary transformation to A */ +/* (Complex Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + zunmqr_("L", "C", &irows, &icols, &irows, &b[*ilo + *ilo * b_dim1], ldb, & + work[itau], &a[*ilo + *ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL and/or VR */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvl) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vl[vl_offset], ldvl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + zlacpy_("L", &i__1, &i__2, &b[*ilo + 1 + *ilo * b_dim1], ldb, &vl[ + *ilo + 1 + *ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + zungqr_(&irows, &irows, &irows, &vl[*ilo + *ilo * vl_dim1], ldvl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + + if (ilvr) { + zlaset_("Full", n, n, &c_b1, &c_b2, &vr[vr_offset], ldvr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + if (ilv || ! wantsn) { + +/* Eigenvectors requested -- work on whole matrix. */ + + zgghrd_(jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); + } else { + zgghrd_("N", "N", &irows, &c__1, &irows, &a[*ilo + *ilo * a_dim1], + lda, &b[*ilo + *ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur forms and Schur vectors) */ +/* (Complex Workspace: need N) */ +/* (Real Workspace: need N) */ + + iwrk = itau; + if (ilv || ! wantsn) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + + i__1 = *lwork + 1 - iwrk; + zhgeqz_(chtemp, jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset] + , ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, &vr[vr_offset], + ldvr, &work[iwrk], &i__1, &rwork[1], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L90; + } + +/* Compute Eigenvectors and estimate condition numbers if desired */ +/* ZTGEVC: (Complex Workspace: need 2*N ) */ +/* (Real Workspace: need 2*N ) */ +/* ZTGSNA: (Complex Workspace: need 2*N*N if SENSE='V' or 'B') */ +/* (Integer Workspace: need N+2 ) */ + + if (ilv || ! wantsn) { + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + + ztgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, & + work[iwrk], &rwork[1], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L90; + } + } + + if (! wantsn) { + +/* compute eigenvectors (DTGEVC) and estimate condition */ +/* numbers (DTGSNA). Note that the definition of the condition */ +/* number is not invariant under transformation (u,v) to */ +/* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized */ +/* Schur form (S,T), Q and Z are orthogonal matrices. In order */ +/* to avoid using extra 2*N*N workspace, we have to */ +/* re-calculate eigenvectors and estimate the condition numbers */ +/* one at a time. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + bwork[j] = FALSE_; +/* L10: */ + } + bwork[i__] = TRUE_; + + iwrk = *n + 1; + iwrk1 = iwrk + *n; + + if (wantse || wantsb) { + ztgevc_("B", "S", &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &work[1], n, &work[iwrk], n, & + c__1, &m, &work[iwrk1], &rwork[1], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L90; + } + } + + i__2 = *lwork - iwrk1 + 1; + ztgsna_(sense, "S", &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &work[1], n, &work[iwrk], n, &rconde[ + i__], &rcondv[i__], &c__1, &m, &work[iwrk1], &i__2, & + iwork[1], &ierr); + +/* L20: */ + } + } + } + +/* Undo balancing on VL and VR and normalization */ +/* (Workspace: none needed) */ + + if (ilvl) { + zggbak_(balanc, "L", n, ilo, ihi, &lscale[1], &rscale[1], n, &vl[ + vl_offset], ldvl, &ierr); + + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vl_dim1; + d__3 = temp, d__4 = (d__1 = vl[i__3].r, abs(d__1)) + (d__2 = + d_imag(&vl[jr + jc * vl_dim1]), abs(d__2)); + temp = f2cmax(d__3,d__4); +/* L30: */ + } + if (temp < smlnum) { + goto L50; + } + temp = 1. / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vl_dim1; + i__4 = jr + jc * vl_dim1; + z__1.r = temp * vl[i__4].r, z__1.i = temp * vl[i__4].i; + vl[i__3].r = z__1.r, vl[i__3].i = z__1.i; +/* L40: */ + } +L50: + ; + } + } + + if (ilvr) { + zggbak_(balanc, "R", n, ilo, ihi, &lscale[1], &rscale[1], n, &vr[ + vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vr_dim1; + d__3 = temp, d__4 = (d__1 = vr[i__3].r, abs(d__1)) + (d__2 = + d_imag(&vr[jr + jc * vr_dim1]), abs(d__2)); + temp = f2cmax(d__3,d__4); +/* L60: */ + } + if (temp < smlnum) { + goto L80; + } + temp = 1. / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vr_dim1; + i__4 = jr + jc * vr_dim1; + z__1.r = temp * vr[i__4].r, z__1.i = temp * vr[i__4].i; + vr[i__3].r = z__1.r, vr[i__3].i = z__1.i; +/* L70: */ + } +L80: + ; + } + } + +/* Undo scaling if necessary */ + +L90: + + if (ilascl) { + zlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + zlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + return 0; + +/* End of ZGGEVX */ + +} /* zggevx_ */ + diff --git a/lapack-netlib/SRC/zggglm.c b/lapack-netlib/SRC/zggglm.c new file mode 100644 index 000000000..d776caf9b --- /dev/null +++ b/lapack-netlib/SRC/zggglm.c @@ -0,0 +1,801 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGGGLM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGGLM + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), */ +/* $ X( * ), Y( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGGLM solves a general Gauss-Markov linear model (GLM) problem: */ +/* > */ +/* > minimize || y ||_2 subject to d = A*x + B*y */ +/* > x */ +/* > */ +/* > where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */ +/* > given N-vector. It is assumed that M <= N <= M+P, and */ +/* > */ +/* > rank(A) = M and rank( A B ) = N. */ +/* > */ +/* > Under these assumptions, the constrained equation is always */ +/* > consistent, and there is a unique solution x and a minimal 2-norm */ +/* > solution y, which is obtained using a generalized QR factorization */ +/* > of the matrices (A, B) given by */ +/* > */ +/* > A = Q*(R), B = Q*T*Z. */ +/* > (0) */ +/* > */ +/* > In particular, if matrix B is square nonsingular, then the problem */ +/* > GLM is equivalent to the following weighted linear least squares */ +/* > problem */ +/* > */ +/* > minimize || inv(B)*(d-A*x) ||_2 */ +/* > x */ +/* > */ +/* > where inv(B) denotes the inverse of B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix A. 0 <= M <= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of columns of the matrix B. P >= N-M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,M) */ +/* > On entry, the N-by-M matrix A. */ +/* > On exit, the upper triangular part of the array A contains */ +/* > the M-by-M upper triangular matrix R. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,P) */ +/* > On entry, the N-by-P matrix B. */ +/* > On exit, if N <= P, the upper triangle of the subarray */ +/* > B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ +/* > if N > P, the elements on and above the (N-P)th subdiagonal */ +/* > contain the N-by-P upper trapezoidal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > On entry, D is the left hand side of the GLM equation. */ +/* > On exit, D is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Y */ +/* > \verbatim */ +/* > Y is COMPLEX*16 array, dimension (P) */ +/* > */ +/* > On exit, X and Y are the solutions of the GLM problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N+M+P). */ +/* > For optimum performance, LWORK >= M+f2cmin(N,P)+f2cmax(N,P)*NB, */ +/* > where NB is an upper bound for the optimal blocksizes for */ +/* > ZGEQRF, ZGERQF, ZUNMQR and ZUNMRQ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1: the upper triangular factor R associated with A in the */ +/* > generalized QR factorization of the pair (A, B) is */ +/* > singular, so that rank(A) < M; the least squares */ +/* > solution could not be computed. */ +/* > = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal */ +/* > factor T associated with B in the generalized QR */ +/* > factorization of the pair (A, B) is singular, so that */ +/* > rank( A B ) < N; the least squares solution could not */ +/* > be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int zggglm_(integer *n, integer *m, integer *p, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *d__, doublecomplex *x, doublecomplex *y, doublecomplex + *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Local variables */ + integer lopt, i__; + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer nb, np; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zggqrf_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer *) + ; + integer lwkmin, nb1, nb2, nb3, nb4, lwkopt; + logical lquery; + extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmrq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), ztrtrs_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* =================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --d__; + --x; + --y; + --work; + + /* Function Body */ + *info = 0; + np = f2cmin(*n,*p); + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*m < 0 || *m > *n) { + *info = -2; + } else if (*p < 0 || *p < *n - *m) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + +/* Calculate workspace */ + + if (*info == 0) { + if (*n == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = ilaenv_(&c__1, "ZGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb2 = ilaenv_(&c__1, "ZGERQF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb3 = ilaenv_(&c__1, "ZUNMQR", " ", n, m, p, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb4 = ilaenv_(&c__1, "ZUNMRQ", " ", n, m, p, &c_n1, (ftnlen)6, ( + ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3); + nb = f2cmax(i__1,nb4); + lwkmin = *m + *n + *p; + lwkopt = *m + np + f2cmax(*n,*p) * nb; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGGLM", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + x[i__2].r = 0., x[i__2].i = 0.; + } + i__1 = *p; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; + } + return 0; + } + +/* Compute the GQR factorization of matrices A and B: */ + +/* Q**H*A = ( R11 ) M, Q**H*B*Z**H = ( T11 T12 ) M */ +/* ( 0 ) N-M ( 0 T22 ) N-M */ +/* M M+P-N N-M */ + +/* where R11 and T22 are upper triangular, and Q and Z are */ +/* unitary. */ + + i__1 = *lwork - *m - np; + zggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m + + 1], &work[*m + np + 1], &i__1, info); + i__1 = *m + np + 1; + lopt = (integer) work[i__1].r; + +/* Update left-hand-side vector d = Q**H*d = ( d1 ) M */ +/* ( d2 ) N-M */ + + i__1 = f2cmax(1,*n); + i__2 = *lwork - *m - np; + zunmqr_("Left", "Conjugate transpose", n, &c__1, m, &a[a_offset], lda, & + work[1], &d__[1], &i__1, &work[*m + np + 1], &i__2, info); +/* Computing MAX */ + i__3 = *m + np + 1; + i__1 = lopt, i__2 = (integer) work[i__3].r; + lopt = f2cmax(i__1,i__2); + +/* Solve T22*y2 = d2 for y2 */ + + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + ztrtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 + + (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, + info); + + if (*info > 0) { + *info = 1; + return 0; + } + + i__1 = *n - *m; + zcopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1); + } + +/* Set y1 = 0 */ + + i__1 = *m + *p - *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; +/* L10: */ + } + +/* Update d1 = d1 - T12*y2 */ + + i__1 = *n - *m; + z__1.r = -1., z__1.i = 0.; + zgemv_("No transpose", m, &i__1, &z__1, &b[(*m + *p - *n + 1) * b_dim1 + + 1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b2, &d__[1], &c__1); + +/* Solve triangular system: R11*x = d1 */ + + if (*m > 0) { + ztrtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], + lda, &d__[1], m, info); + + if (*info > 0) { + *info = 2; + return 0; + } + +/* Copy D to X */ + + zcopy_(m, &d__[1], &c__1, &x[1], &c__1); + } + +/* Backward transformation y = Z**H *y */ + +/* Computing MAX */ + i__1 = 1, i__2 = *n - *p + 1; + i__3 = f2cmax(1,*p); + i__4 = *lwork - *m - np; + zunmrq_("Left", "Conjugate transpose", p, &c__1, &np, &b[f2cmax(i__1,i__2) + + b_dim1], ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], & + i__4, info); +/* Computing MAX */ + i__4 = *m + np + 1; + i__2 = lopt, i__3 = (integer) work[i__4].r; + i__1 = *m + np + f2cmax(i__2,i__3); + work[1].r = (doublereal) i__1, work[1].i = 0.; + + return 0; + +/* End of ZGGGLM */ + +} /* zggglm_ */ + diff --git a/lapack-netlib/SRC/zgghd3.c b/lapack-netlib/SRC/zgghd3.c new file mode 100644 index 000000000..259401a3b --- /dev/null +++ b/lapack-netlib/SRC/zgghd3.c @@ -0,0 +1,1641 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGGHD3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGHD3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */ +/* LDQ, Z, LDZ, WORK, LWORK, INFO ) */ + +/* CHARACTER COMPQ, COMPZ */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ Z( LDZ, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGHD3 reduces a pair of complex matrices (A,B) to generalized upper */ +/* > Hessenberg form using unitary transformations, where A is a */ +/* > general matrix and B is upper triangular. The form of the */ +/* > generalized eigenvalue problem is */ +/* > A*x = lambda*B*x, */ +/* > and B is typically made upper triangular by computing its QR */ +/* > factorization and moving the unitary matrix Q to the left side */ +/* > of the equation. */ +/* > */ +/* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */ +/* > Q**H*A*Z = H */ +/* > and transforms B to another upper triangular matrix T: */ +/* > Q**H*B*Z = T */ +/* > in order to reduce the problem to its standard form */ +/* > H*y = lambda*T*y */ +/* > where y = Z**H*x. */ +/* > */ +/* > The unitary matrices Q and Z are determined as products of Givens */ +/* > rotations. They may either be formed explicitly, or they may be */ +/* > postmultiplied into input matrices Q1 and Z1, so that */ +/* > Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H */ +/* > Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H */ +/* > If Q1 is the unitary matrix from the QR factorization of B in the */ +/* > original equation A*x = lambda*B*x, then ZGGHD3 reduces the original */ +/* > problem to generalized Hessenberg form. */ +/* > */ +/* > This is a blocked variant of CGGHRD, using matrix-matrix */ +/* > multiplications for parts of the computation to enhance performance. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPQ */ +/* > \verbatim */ +/* > COMPQ is CHARACTER*1 */ +/* > = 'N': do not compute Q; */ +/* > = 'I': Q is initialized to the unit matrix, and the */ +/* > unitary matrix Q is returned; */ +/* > = 'V': Q must contain a unitary matrix Q1 on entry, */ +/* > and the product Q1*Q is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': do not compute Z; */ +/* > = 'I': Z is initialized to the unit matrix, and the */ +/* > unitary matrix Z is returned; */ +/* > = 'V': Z must contain a unitary matrix Z1 on entry, */ +/* > and the product Z1*Z is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > ILO and IHI mark the rows and columns of A which are to be */ +/* > reduced. It is assumed that A is already upper triangular */ +/* > in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are */ +/* > normally set by a previous call to ZGGBAL; otherwise they */ +/* > should be set to 1 and N respectively. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the N-by-N general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > rest is set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the N-by-N upper triangular matrix B. */ +/* > On exit, the upper triangular matrix T = Q**H B Z. The */ +/* > elements below the diagonal are set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ, N) */ +/* > On entry, if COMPQ = 'V', the unitary matrix Q1, typically */ +/* > from the QR factorization of B. */ +/* > On exit, if COMPQ='I', the unitary matrix Q, and if */ +/* > COMPQ = 'V', the product Q1*Q. */ +/* > Not referenced if COMPQ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. */ +/* > LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > On entry, if COMPZ = 'V', the unitary matrix Z1. */ +/* > On exit, if COMPZ='I', the unitary matrix Z, and if */ +/* > COMPZ = 'V', the product Z1*Z. */ +/* > Not referenced if COMPZ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. */ +/* > LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (LWORK) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1. */ +/* > For optimum performance LWORK >= 6*N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date January 2015 */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This routine reduces A to Hessenberg form and maintains B in */ +/* > using a blocked variant of Moler and Stewart's original algorithm, */ +/* > as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti */ +/* > (BIT 2008). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgghd3_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + logical blk22; + integer cola, jcol, ierr; + doublecomplex temp; + integer jrow, topq, ppwo; + extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *); + doublecomplex temp1, temp2, temp3; + doublereal c__; + integer kacc22, i__, j, k; + doublecomplex s; + extern logical lsame_(char *, char *); + integer nbmin; + doublecomplex ctemp; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer nblst; + logical initq; + doublecomplex c1, c2; + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + logical wantq; + integer j0; + logical initz; + extern /* Subroutine */ int zunm22_(char *, char *, integer *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *) + ; + logical wantz; + doublecomplex s1, s2; + extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + char compq2[1], compz2[1]; + integer nb, jj, nh, nx, pw; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zgghrd_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ), zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *), zlartg_(doublecomplex *, doublecomplex *, doublereal *, + doublecomplex *, doublecomplex *), zlacpy_(char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *); + integer lwkopt; + logical lquery; + integer nnb, len, top, ppw, n2nb; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2015 */ + + + +/* ===================================================================== */ + + +/* Decode and test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen) + 1); +/* Computing MAX */ + i__1 = *n * 6 * nb; + lwkopt = f2cmax(i__1,1); + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + initq = lsame_(compq, "I"); + wantq = initq || lsame_(compq, "V"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + lquery = *lwork == -1; + + if (! lsame_(compq, "N") && ! wantq) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*ihi > *n || *ihi < *ilo - 1) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (wantq && *ldq < *n || *ldq < 1) { + *info = -11; + } else if (wantz && *ldz < *n || *ldz < 1) { + *info = -13; + } else if (*lwork < 1 && ! lquery) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGHD3", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Initialize Q and Z if desired. */ + + if (initq) { + zlaset_("All", n, n, &c_b2, &c_b1, &q[q_offset], ldq); + } + if (initz) { + zlaset_("All", n, n, &c_b2, &c_b1, &z__[z_offset], ldz); + } + +/* Zero out lower triangle of B. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + zlaset_("Lower", &i__1, &i__2, &c_b2, &c_b2, &b[b_dim1 + 2], ldb); + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* Determine the blocksize. */ + + nbmin = ilaenv_(&c__2, "ZGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb > 1 && nb < nh) { + +/* Determine when to use unblocked instead of blocked code. */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, "ZGGHD3", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code. */ + + if (*lwork < lwkopt) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code. */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGGHD3", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + if (*lwork >= *n * 6 * nbmin) { + nb = *lwork / (*n * 6); + } else { + nb = 1; + } + } + } + } + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + jcol = *ilo; + + } else { + +/* Use blocked code */ + + kacc22 = ilaenv_(&c__16, "ZGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, + (ftnlen)1); + blk22 = kacc22 == 2; + i__1 = *ihi - 2; + i__2 = nb; + for (jcol = *ilo; i__2 < 0 ? jcol >= i__1 : jcol <= i__1; jcol += + i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - jcol - 1; + nnb = f2cmin(i__3,i__4); + +/* Initialize small unitary factors that will hold the */ +/* accumulated Givens rotations in workspace. */ +/* N2NB denotes the number of 2*NNB-by-2*NNB factors */ +/* NBLST denotes the (possibly smaller) order of the last */ +/* factor. */ + + n2nb = (*ihi - jcol - 1) / nnb - 1; + nblst = *ihi - jcol - n2nb * nnb; + zlaset_("All", &nblst, &nblst, &c_b2, &c_b1, &work[1], &nblst); + pw = nblst * nblst + 1; + i__3 = n2nb; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = nnb << 1; + i__5 = nnb << 1; + i__6 = nnb << 1; + zlaset_("All", &i__4, &i__5, &c_b2, &c_b1, &work[pw], &i__6); + pw += (nnb << 2) * nnb; + } + +/* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form. */ + + i__3 = jcol + nnb - 1; + for (j = jcol; j <= i__3; ++j) { + +/* Reduce Jth column of A. Store cosines and sines in Jth */ +/* column of A and B, respectively. */ + + i__4 = j + 2; + for (i__ = *ihi; i__ >= i__4; --i__) { + i__5 = i__ - 1 + j * a_dim1; + temp.r = a[i__5].r, temp.i = a[i__5].i; + zlartg_(&temp, &a[i__ + j * a_dim1], &c__, &s, &a[i__ - 1 + + j * a_dim1]); + i__5 = i__ + j * a_dim1; + z__1.r = c__, z__1.i = 0.; + a[i__5].r = z__1.r, a[i__5].i = z__1.i; + i__5 = i__ + j * b_dim1; + b[i__5].r = s.r, b[i__5].i = s.i; + } + +/* Accumulate Givens rotations into workspace array. */ + + ppw = (nblst + 1) * (nblst - 2) - j + jcol + 1; + len = j + 2 - jcol; + jrow = j + n2nb * nnb + 2; + i__4 = jrow; + for (i__ = *ihi; i__ >= i__4; --i__) { + i__5 = i__ + j * a_dim1; + ctemp.r = a[i__5].r, ctemp.i = a[i__5].i; + i__5 = i__ + j * b_dim1; + s.r = b[i__5].r, s.i = b[i__5].i; + i__5 = ppw + len - 1; + for (jj = ppw; jj <= i__5; ++jj) { + i__6 = jj + nblst; + temp.r = work[i__6].r, temp.i = work[i__6].i; + i__6 = jj + nblst; + z__2.r = ctemp.r * temp.r - ctemp.i * temp.i, z__2.i = + ctemp.r * temp.i + ctemp.i * temp.r; + i__7 = jj; + z__3.r = s.r * work[i__7].r - s.i * work[i__7].i, + z__3.i = s.r * work[i__7].i + s.i * work[i__7] + .r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + work[i__6].r = z__1.r, work[i__6].i = z__1.i; + i__6 = jj; + d_cnjg(&z__3, &s); + z__2.r = z__3.r * temp.r - z__3.i * temp.i, z__2.i = + z__3.r * temp.i + z__3.i * temp.r; + i__7 = jj; + z__4.r = ctemp.r * work[i__7].r - ctemp.i * work[i__7] + .i, z__4.i = ctemp.r * work[i__7].i + ctemp.i + * work[i__7].r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + work[i__6].r = z__1.r, work[i__6].i = z__1.i; + } + ++len; + ppw = ppw - nblst - 1; + } + + ppwo = nblst * nblst + (nnb + j - jcol - 1 << 1) * nnb + nnb; + j0 = jrow - nnb; + i__4 = j + 2; + i__5 = -nnb; + for (jrow = j0; i__5 < 0 ? jrow >= i__4 : jrow <= i__4; jrow + += i__5) { + ppw = ppwo; + len = j + 2 - jcol; + i__6 = jrow; + for (i__ = jrow + nnb - 1; i__ >= i__6; --i__) { + i__7 = i__ + j * a_dim1; + ctemp.r = a[i__7].r, ctemp.i = a[i__7].i; + i__7 = i__ + j * b_dim1; + s.r = b[i__7].r, s.i = b[i__7].i; + i__7 = ppw + len - 1; + for (jj = ppw; jj <= i__7; ++jj) { + i__8 = jj + (nnb << 1); + temp.r = work[i__8].r, temp.i = work[i__8].i; + i__8 = jj + (nnb << 1); + z__2.r = ctemp.r * temp.r - ctemp.i * temp.i, + z__2.i = ctemp.r * temp.i + ctemp.i * + temp.r; + i__9 = jj; + z__3.r = s.r * work[i__9].r - s.i * work[i__9].i, + z__3.i = s.r * work[i__9].i + s.i * work[ + i__9].r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + work[i__8].r = z__1.r, work[i__8].i = z__1.i; + i__8 = jj; + d_cnjg(&z__3, &s); + z__2.r = z__3.r * temp.r - z__3.i * temp.i, + z__2.i = z__3.r * temp.i + z__3.i * + temp.r; + i__9 = jj; + z__4.r = ctemp.r * work[i__9].r - ctemp.i * work[ + i__9].i, z__4.i = ctemp.r * work[i__9].i + + ctemp.i * work[i__9].r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + + z__4.i; + work[i__8].r = z__1.r, work[i__8].i = z__1.i; + } + ++len; + ppw = ppw - (nnb << 1) - 1; + } + ppwo += (nnb << 2) * nnb; + } + +/* TOP denotes the number of top rows in A and B that will */ +/* not be updated during the next steps. */ + + if (jcol <= 2) { + top = 0; + } else { + top = jcol; + } + +/* Propagate transformations through B and replace stored */ +/* left sines/cosines by right sines/cosines. */ + + i__5 = j + 1; + for (jj = *n; jj >= i__5; --jj) { + +/* Update JJth column of B. */ + +/* Computing MIN */ + i__4 = jj + 1; + i__6 = j + 2; + for (i__ = f2cmin(i__4,*ihi); i__ >= i__6; --i__) { + i__4 = i__ + j * a_dim1; + ctemp.r = a[i__4].r, ctemp.i = a[i__4].i; + i__4 = i__ + j * b_dim1; + s.r = b[i__4].r, s.i = b[i__4].i; + i__4 = i__ + jj * b_dim1; + temp.r = b[i__4].r, temp.i = b[i__4].i; + i__4 = i__ + jj * b_dim1; + z__2.r = ctemp.r * temp.r - ctemp.i * temp.i, z__2.i = + ctemp.r * temp.i + ctemp.i * temp.r; + d_cnjg(&z__4, &s); + i__7 = i__ - 1 + jj * b_dim1; + z__3.r = z__4.r * b[i__7].r - z__4.i * b[i__7].i, + z__3.i = z__4.r * b[i__7].i + z__4.i * b[i__7] + .r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + i__4 = i__ - 1 + jj * b_dim1; + z__2.r = s.r * temp.r - s.i * temp.i, z__2.i = s.r * + temp.i + s.i * temp.r; + i__7 = i__ - 1 + jj * b_dim1; + z__3.r = ctemp.r * b[i__7].r - ctemp.i * b[i__7].i, + z__3.i = ctemp.r * b[i__7].i + ctemp.i * b[ + i__7].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + +/* Annihilate B( JJ+1, JJ ). */ + + if (jj < *ihi) { + i__6 = jj + 1 + (jj + 1) * b_dim1; + temp.r = b[i__6].r, temp.i = b[i__6].i; + zlartg_(&temp, &b[jj + 1 + jj * b_dim1], &c__, &s, &b[ + jj + 1 + (jj + 1) * b_dim1]); + i__6 = jj + 1 + jj * b_dim1; + b[i__6].r = 0., b[i__6].i = 0.; + i__6 = jj - top; + zrot_(&i__6, &b[top + 1 + (jj + 1) * b_dim1], &c__1, & + b[top + 1 + jj * b_dim1], &c__1, &c__, &s); + i__6 = jj + 1 + j * a_dim1; + z__1.r = c__, z__1.i = 0.; + a[i__6].r = z__1.r, a[i__6].i = z__1.i; + i__6 = jj + 1 + j * b_dim1; + d_cnjg(&z__2, &s); + z__1.r = -z__2.r, z__1.i = -z__2.i; + b[i__6].r = z__1.r, b[i__6].i = z__1.i; + } + } + +/* Update A by transformations from right. */ + + jj = (*ihi - j - 1) % 3; + i__5 = jj + 1; + for (i__ = *ihi - j - 3; i__ >= i__5; i__ += -3) { + i__6 = j + 1 + i__ + j * a_dim1; + ctemp.r = a[i__6].r, ctemp.i = a[i__6].i; + i__6 = j + 1 + i__ + j * b_dim1; + z__1.r = -b[i__6].r, z__1.i = -b[i__6].i; + s.r = z__1.r, s.i = z__1.i; + i__6 = j + 2 + i__ + j * a_dim1; + c1.r = a[i__6].r, c1.i = a[i__6].i; + i__6 = j + 2 + i__ + j * b_dim1; + z__1.r = -b[i__6].r, z__1.i = -b[i__6].i; + s1.r = z__1.r, s1.i = z__1.i; + i__6 = j + 3 + i__ + j * a_dim1; + c2.r = a[i__6].r, c2.i = a[i__6].i; + i__6 = j + 3 + i__ + j * b_dim1; + z__1.r = -b[i__6].r, z__1.i = -b[i__6].i; + s2.r = z__1.r, s2.i = z__1.i; + + i__6 = *ihi; + for (k = top + 1; k <= i__6; ++k) { + i__4 = k + (j + i__) * a_dim1; + temp.r = a[i__4].r, temp.i = a[i__4].i; + i__4 = k + (j + i__ + 1) * a_dim1; + temp1.r = a[i__4].r, temp1.i = a[i__4].i; + i__4 = k + (j + i__ + 2) * a_dim1; + temp2.r = a[i__4].r, temp2.i = a[i__4].i; + i__4 = k + (j + i__ + 3) * a_dim1; + temp3.r = a[i__4].r, temp3.i = a[i__4].i; + i__4 = k + (j + i__ + 3) * a_dim1; + z__2.r = c2.r * temp3.r - c2.i * temp3.i, z__2.i = + c2.r * temp3.i + c2.i * temp3.r; + d_cnjg(&z__4, &s2); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, z__3.i = + z__4.r * temp2.i + z__4.i * temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + a[i__4].r = z__1.r, a[i__4].i = z__1.i; + z__3.r = -s2.r, z__3.i = -s2.i; + z__2.r = z__3.r * temp3.r - z__3.i * temp3.i, z__2.i = + z__3.r * temp3.i + z__3.i * temp3.r; + z__4.r = c2.r * temp2.r - c2.i * temp2.i, z__4.i = + c2.r * temp2.i + c2.i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + temp2.r = z__1.r, temp2.i = z__1.i; + i__4 = k + (j + i__ + 2) * a_dim1; + z__2.r = c1.r * temp2.r - c1.i * temp2.i, z__2.i = + c1.r * temp2.i + c1.i * temp2.r; + d_cnjg(&z__4, &s1); + z__3.r = z__4.r * temp1.r - z__4.i * temp1.i, z__3.i = + z__4.r * temp1.i + z__4.i * temp1.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + a[i__4].r = z__1.r, a[i__4].i = z__1.i; + z__3.r = -s1.r, z__3.i = -s1.i; + z__2.r = z__3.r * temp2.r - z__3.i * temp2.i, z__2.i = + z__3.r * temp2.i + z__3.i * temp2.r; + z__4.r = c1.r * temp1.r - c1.i * temp1.i, z__4.i = + c1.r * temp1.i + c1.i * temp1.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + temp1.r = z__1.r, temp1.i = z__1.i; + i__4 = k + (j + i__ + 1) * a_dim1; + z__2.r = ctemp.r * temp1.r - ctemp.i * temp1.i, + z__2.i = ctemp.r * temp1.i + ctemp.i * + temp1.r; + d_cnjg(&z__4, &s); + z__3.r = z__4.r * temp.r - z__4.i * temp.i, z__3.i = + z__4.r * temp.i + z__4.i * temp.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + a[i__4].r = z__1.r, a[i__4].i = z__1.i; + i__4 = k + (j + i__) * a_dim1; + z__3.r = -s.r, z__3.i = -s.i; + z__2.r = z__3.r * temp1.r - z__3.i * temp1.i, z__2.i = + z__3.r * temp1.i + z__3.i * temp1.r; + z__4.r = ctemp.r * temp.r - ctemp.i * temp.i, z__4.i = + ctemp.r * temp.i + ctemp.i * temp.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__4].r = z__1.r, a[i__4].i = z__1.i; + } + } + + if (jj > 0) { + for (i__ = jj; i__ >= 1; --i__) { + i__5 = j + 1 + i__ + j * a_dim1; + c__ = a[i__5].r; + i__5 = *ihi - top; + d_cnjg(&z__2, &b[j + 1 + i__ + j * b_dim1]); + z__1.r = -z__2.r, z__1.i = -z__2.i; + zrot_(&i__5, &a[top + 1 + (j + i__ + 1) * a_dim1], & + c__1, &a[top + 1 + (j + i__) * a_dim1], &c__1, + &c__, &z__1); + } + } + +/* Update (J+1)th column of A by transformations from left. */ + + if (j < jcol + nnb - 1) { + len = j + 1 - jcol; + +/* Multiply with the trailing accumulated unitary */ +/* matrix, which takes the form */ + +/* [ U11 U12 ] */ +/* U = [ ], */ +/* [ U21 U22 ] */ + +/* where U21 is a LEN-by-LEN matrix and U12 is lower */ +/* triangular. */ + + jrow = *ihi - nblst + 1; + zgemv_("Conjugate", &nblst, &len, &c_b1, &work[1], &nblst, + &a[jrow + (j + 1) * a_dim1], &c__1, &c_b2, &work[ + pw], &c__1); + ppw = pw + len; + i__5 = jrow + nblst - len - 1; + for (i__ = jrow; i__ <= i__5; ++i__) { + i__6 = ppw; + i__4 = i__ + (j + 1) * a_dim1; + work[i__6].r = a[i__4].r, work[i__6].i = a[i__4].i; + ++ppw; + } + i__5 = nblst - len; + ztrmv_("Lower", "Conjugate", "Non-unit", &i__5, &work[len + * nblst + 1], &nblst, &work[pw + len], &c__1); + i__5 = nblst - len; + zgemv_("Conjugate", &len, &i__5, &c_b1, &work[(len + 1) * + nblst - len + 1], &nblst, &a[jrow + nblst - len + + (j + 1) * a_dim1], &c__1, &c_b1, &work[pw + len], + &c__1); + ppw = pw; + i__5 = jrow + nblst - 1; + for (i__ = jrow; i__ <= i__5; ++i__) { + i__6 = i__ + (j + 1) * a_dim1; + i__4 = ppw; + a[i__6].r = work[i__4].r, a[i__6].i = work[i__4].i; + ++ppw; + } + +/* Multiply with the other accumulated unitary */ +/* matrices, which take the form */ + +/* [ U11 U12 0 ] */ +/* [ ] */ +/* U = [ U21 U22 0 ], */ +/* [ ] */ +/* [ 0 0 I ] */ + +/* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity */ +/* matrix, U21 is a LEN-by-LEN upper triangular matrix */ +/* and U12 is an NNB-by-NNB lower triangular matrix. */ + + ppwo = nblst * nblst + 1; + j0 = jrow - nnb; + i__5 = jcol + 1; + i__6 = -nnb; + for (jrow = j0; i__6 < 0 ? jrow >= i__5 : jrow <= i__5; + jrow += i__6) { + ppw = pw + len; + i__4 = jrow + nnb - 1; + for (i__ = jrow; i__ <= i__4; ++i__) { + i__7 = ppw; + i__8 = i__ + (j + 1) * a_dim1; + work[i__7].r = a[i__8].r, work[i__7].i = a[i__8] + .i; + ++ppw; + } + ppw = pw; + i__4 = jrow + nnb + len - 1; + for (i__ = jrow + nnb; i__ <= i__4; ++i__) { + i__7 = ppw; + i__8 = i__ + (j + 1) * a_dim1; + work[i__7].r = a[i__8].r, work[i__7].i = a[i__8] + .i; + ++ppw; + } + i__4 = nnb << 1; + ztrmv_("Upper", "Conjugate", "Non-unit", &len, &work[ + ppwo + nnb], &i__4, &work[pw], &c__1); + i__4 = nnb << 1; + ztrmv_("Lower", "Conjugate", "Non-unit", &nnb, &work[ + ppwo + (len << 1) * nnb], &i__4, &work[pw + + len], &c__1); + i__4 = nnb << 1; + zgemv_("Conjugate", &nnb, &len, &c_b1, &work[ppwo], & + i__4, &a[jrow + (j + 1) * a_dim1], &c__1, & + c_b1, &work[pw], &c__1); + i__4 = nnb << 1; + zgemv_("Conjugate", &len, &nnb, &c_b1, &work[ppwo + ( + len << 1) * nnb + nnb], &i__4, &a[jrow + nnb + + (j + 1) * a_dim1], &c__1, &c_b1, &work[pw + + len], &c__1); + ppw = pw; + i__4 = jrow + len + nnb - 1; + for (i__ = jrow; i__ <= i__4; ++i__) { + i__7 = i__ + (j + 1) * a_dim1; + i__8 = ppw; + a[i__7].r = work[i__8].r, a[i__7].i = work[i__8] + .i; + ++ppw; + } + ppwo += (nnb << 2) * nnb; + } + } + } + +/* Apply accumulated unitary matrices to A. */ + + cola = *n - jcol - nnb + 1; + j = *ihi - nblst + 1; + zgemm_("Conjugate", "No Transpose", &nblst, &cola, &nblst, &c_b1, + &work[1], &nblst, &a[j + (jcol + nnb) * a_dim1], lda, & + c_b2, &work[pw], &nblst); + zlacpy_("All", &nblst, &cola, &work[pw], &nblst, &a[j + (jcol + + nnb) * a_dim1], lda); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__3 = jcol + 1; + i__6 = -nnb; + for (j = j0; i__6 < 0 ? j >= i__3 : j <= i__3; j += i__6) { + if (blk22) { + +/* Exploit the structure of */ + +/* [ U11 U12 ] */ +/* U = [ ] */ +/* [ U21 U22 ], */ + +/* where all blocks are NNB-by-NNB, U21 is upper */ +/* triangular and U12 is lower triangular. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + zunm22_("Left", "Conjugate", &i__5, &cola, &nnb, &nnb, & + work[ppwo], &i__4, &a[j + (jcol + nnb) * a_dim1], + lda, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + i__8 = nnb << 1; + zgemm_("Conjugate", "No Transpose", &i__5, &cola, &i__4, & + c_b1, &work[ppwo], &i__7, &a[j + (jcol + nnb) * + a_dim1], lda, &c_b2, &work[pw], &i__8); + i__5 = nnb << 1; + i__4 = nnb << 1; + zlacpy_("All", &i__5, &cola, &work[pw], &i__4, &a[j + ( + jcol + nnb) * a_dim1], lda); + } + ppwo += (nnb << 2) * nnb; + } + +/* Apply accumulated unitary matrices to Q. */ + + if (wantq) { + j = *ihi - nblst + 1; + if (initq) { +/* Computing MAX */ + i__6 = 2, i__3 = j - jcol + 1; + topq = f2cmax(i__6,i__3); + nh = *ihi - topq + 1; + } else { + topq = 1; + nh = *n; + } + zgemm_("No Transpose", "No Transpose", &nh, &nblst, &nblst, & + c_b1, &q[topq + j * q_dim1], ldq, &work[1], &nblst, & + c_b2, &work[pw], &nh); + zlacpy_("All", &nh, &nblst, &work[pw], &nh, &q[topq + j * + q_dim1], ldq); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__6 = jcol + 1; + i__3 = -nnb; + for (j = j0; i__3 < 0 ? j >= i__6 : j <= i__6; j += i__3) { + if (initq) { +/* Computing MAX */ + i__5 = 2, i__4 = j - jcol + 1; + topq = f2cmax(i__5,i__4); + nh = *ihi - topq + 1; + } + if (blk22) { + +/* Exploit the structure of U. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + zunm22_("Right", "No Transpose", &nh, &i__5, &nnb, & + nnb, &work[ppwo], &i__4, &q[topq + j * q_dim1] + , ldq, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + zgemm_("No Transpose", "No Transpose", &nh, &i__5, & + i__4, &c_b1, &q[topq + j * q_dim1], ldq, & + work[ppwo], &i__7, &c_b2, &work[pw], &nh); + i__5 = nnb << 1; + zlacpy_("All", &nh, &i__5, &work[pw], &nh, &q[topq + + j * q_dim1], ldq); + } + ppwo += (nnb << 2) * nnb; + } + } + +/* Accumulate right Givens rotations if required. */ + + if (wantz || top > 0) { + +/* Initialize small unitary factors that will hold the */ +/* accumulated Givens rotations in workspace. */ + + zlaset_("All", &nblst, &nblst, &c_b2, &c_b1, &work[1], &nblst); + pw = nblst * nblst + 1; + i__3 = n2nb; + for (i__ = 1; i__ <= i__3; ++i__) { + i__6 = nnb << 1; + i__5 = nnb << 1; + i__4 = nnb << 1; + zlaset_("All", &i__6, &i__5, &c_b2, &c_b1, &work[pw], & + i__4); + pw += (nnb << 2) * nnb; + } + +/* Accumulate Givens rotations into workspace array. */ + + i__3 = jcol + nnb - 1; + for (j = jcol; j <= i__3; ++j) { + ppw = (nblst + 1) * (nblst - 2) - j + jcol + 1; + len = j + 2 - jcol; + jrow = j + n2nb * nnb + 2; + i__6 = jrow; + for (i__ = *ihi; i__ >= i__6; --i__) { + i__5 = i__ + j * a_dim1; + ctemp.r = a[i__5].r, ctemp.i = a[i__5].i; + i__5 = i__ + j * a_dim1; + a[i__5].r = 0., a[i__5].i = 0.; + i__5 = i__ + j * b_dim1; + s.r = b[i__5].r, s.i = b[i__5].i; + i__5 = i__ + j * b_dim1; + b[i__5].r = 0., b[i__5].i = 0.; + i__5 = ppw + len - 1; + for (jj = ppw; jj <= i__5; ++jj) { + i__4 = jj + nblst; + temp.r = work[i__4].r, temp.i = work[i__4].i; + i__4 = jj + nblst; + z__2.r = ctemp.r * temp.r - ctemp.i * temp.i, + z__2.i = ctemp.r * temp.i + ctemp.i * + temp.r; + d_cnjg(&z__4, &s); + i__7 = jj; + z__3.r = z__4.r * work[i__7].r - z__4.i * work[ + i__7].i, z__3.i = z__4.r * work[i__7].i + + z__4.i * work[i__7].r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + work[i__4].r = z__1.r, work[i__4].i = z__1.i; + i__4 = jj; + z__2.r = s.r * temp.r - s.i * temp.i, z__2.i = + s.r * temp.i + s.i * temp.r; + i__7 = jj; + z__3.r = ctemp.r * work[i__7].r - ctemp.i * work[ + i__7].i, z__3.i = ctemp.r * work[i__7].i + + ctemp.i * work[i__7].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + work[i__4].r = z__1.r, work[i__4].i = z__1.i; + } + ++len; + ppw = ppw - nblst - 1; + } + + ppwo = nblst * nblst + (nnb + j - jcol - 1 << 1) * nnb + + nnb; + j0 = jrow - nnb; + i__6 = j + 2; + i__5 = -nnb; + for (jrow = j0; i__5 < 0 ? jrow >= i__6 : jrow <= i__6; + jrow += i__5) { + ppw = ppwo; + len = j + 2 - jcol; + i__4 = jrow; + for (i__ = jrow + nnb - 1; i__ >= i__4; --i__) { + i__7 = i__ + j * a_dim1; + ctemp.r = a[i__7].r, ctemp.i = a[i__7].i; + i__7 = i__ + j * a_dim1; + a[i__7].r = 0., a[i__7].i = 0.; + i__7 = i__ + j * b_dim1; + s.r = b[i__7].r, s.i = b[i__7].i; + i__7 = i__ + j * b_dim1; + b[i__7].r = 0., b[i__7].i = 0.; + i__7 = ppw + len - 1; + for (jj = ppw; jj <= i__7; ++jj) { + i__8 = jj + (nnb << 1); + temp.r = work[i__8].r, temp.i = work[i__8].i; + i__8 = jj + (nnb << 1); + z__2.r = ctemp.r * temp.r - ctemp.i * temp.i, + z__2.i = ctemp.r * temp.i + ctemp.i * + temp.r; + d_cnjg(&z__4, &s); + i__9 = jj; + z__3.r = z__4.r * work[i__9].r - z__4.i * + work[i__9].i, z__3.i = z__4.r * work[ + i__9].i + z__4.i * work[i__9].r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + work[i__8].r = z__1.r, work[i__8].i = z__1.i; + i__8 = jj; + z__2.r = s.r * temp.r - s.i * temp.i, z__2.i = + s.r * temp.i + s.i * temp.r; + i__9 = jj; + z__3.r = ctemp.r * work[i__9].r - ctemp.i * + work[i__9].i, z__3.i = ctemp.r * work[ + i__9].i + ctemp.i * work[i__9].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + work[i__8].r = z__1.r, work[i__8].i = z__1.i; + } + ++len; + ppw = ppw - (nnb << 1) - 1; + } + ppwo += (nnb << 2) * nnb; + } + } + } else { + + i__3 = *ihi - jcol - 1; + zlaset_("Lower", &i__3, &nnb, &c_b2, &c_b2, &a[jcol + 2 + + jcol * a_dim1], lda); + i__3 = *ihi - jcol - 1; + zlaset_("Lower", &i__3, &nnb, &c_b2, &c_b2, &b[jcol + 2 + + jcol * b_dim1], ldb); + } + +/* Apply accumulated unitary matrices to A and B. */ + + if (top > 0) { + j = *ihi - nblst + 1; + zgemm_("No Transpose", "No Transpose", &top, &nblst, &nblst, & + c_b1, &a[j * a_dim1 + 1], lda, &work[1], &nblst, & + c_b2, &work[pw], &top); + zlacpy_("All", &top, &nblst, &work[pw], &top, &a[j * a_dim1 + + 1], lda); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__3 = jcol + 1; + i__5 = -nnb; + for (j = j0; i__5 < 0 ? j >= i__3 : j <= i__3; j += i__5) { + if (blk22) { + +/* Exploit the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + zunm22_("Right", "No Transpose", &top, &i__6, &nnb, & + nnb, &work[ppwo], &i__4, &a[j * a_dim1 + 1], + lda, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + zgemm_("No Transpose", "No Transpose", &top, &i__6, & + i__4, &c_b1, &a[j * a_dim1 + 1], lda, &work[ + ppwo], &i__7, &c_b2, &work[pw], &top); + i__6 = nnb << 1; + zlacpy_("All", &top, &i__6, &work[pw], &top, &a[j * + a_dim1 + 1], lda); + } + ppwo += (nnb << 2) * nnb; + } + + j = *ihi - nblst + 1; + zgemm_("No Transpose", "No Transpose", &top, &nblst, &nblst, & + c_b1, &b[j * b_dim1 + 1], ldb, &work[1], &nblst, & + c_b2, &work[pw], &top); + zlacpy_("All", &top, &nblst, &work[pw], &top, &b[j * b_dim1 + + 1], ldb); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__5 = jcol + 1; + i__3 = -nnb; + for (j = j0; i__3 < 0 ? j >= i__5 : j <= i__5; j += i__3) { + if (blk22) { + +/* Exploit the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + zunm22_("Right", "No Transpose", &top, &i__6, &nnb, & + nnb, &work[ppwo], &i__4, &b[j * b_dim1 + 1], + ldb, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + zgemm_("No Transpose", "No Transpose", &top, &i__6, & + i__4, &c_b1, &b[j * b_dim1 + 1], ldb, &work[ + ppwo], &i__7, &c_b2, &work[pw], &top); + i__6 = nnb << 1; + zlacpy_("All", &top, &i__6, &work[pw], &top, &b[j * + b_dim1 + 1], ldb); + } + ppwo += (nnb << 2) * nnb; + } + } + +/* Apply accumulated unitary matrices to Z. */ + + if (wantz) { + j = *ihi - nblst + 1; + if (initq) { +/* Computing MAX */ + i__3 = 2, i__5 = j - jcol + 1; + topq = f2cmax(i__3,i__5); + nh = *ihi - topq + 1; + } else { + topq = 1; + nh = *n; + } + zgemm_("No Transpose", "No Transpose", &nh, &nblst, &nblst, & + c_b1, &z__[topq + j * z_dim1], ldz, &work[1], &nblst, + &c_b2, &work[pw], &nh); + zlacpy_("All", &nh, &nblst, &work[pw], &nh, &z__[topq + j * + z_dim1], ldz); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__3 = jcol + 1; + i__5 = -nnb; + for (j = j0; i__5 < 0 ? j >= i__3 : j <= i__3; j += i__5) { + if (initq) { +/* Computing MAX */ + i__6 = 2, i__4 = j - jcol + 1; + topq = f2cmax(i__6,i__4); + nh = *ihi - topq + 1; + } + if (blk22) { + +/* Exploit the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + zunm22_("Right", "No Transpose", &nh, &i__6, &nnb, & + nnb, &work[ppwo], &i__4, &z__[topq + j * + z_dim1], ldz, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + zgemm_("No Transpose", "No Transpose", &nh, &i__6, & + i__4, &c_b1, &z__[topq + j * z_dim1], ldz, & + work[ppwo], &i__7, &c_b2, &work[pw], &nh); + i__6 = nnb << 1; + zlacpy_("All", &nh, &i__6, &work[pw], &nh, &z__[topq + + j * z_dim1], ldz); + } + ppwo += (nnb << 2) * nnb; + } + } + } + } + +/* Use unblocked code to reduce the rest of the matrix */ +/* Avoid re-initialization of modified Q and Z. */ + + *(unsigned char *)compq2 = *(unsigned char *)compq; + *(unsigned char *)compz2 = *(unsigned char *)compz; + if (jcol != *ilo) { + if (wantq) { + *(unsigned char *)compq2 = 'V'; + } + if (wantz) { + *(unsigned char *)compz2 = 'V'; + } + } + + if (jcol < *ihi) { + zgghrd_(compq2, compz2, n, &jcol, ihi, &a[a_offset], lda, &b[b_offset] + , ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &ierr); + } + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + + return 0; + +/* End of ZGGHD3 */ + +} /* zgghd3_ */ + diff --git a/lapack-netlib/SRC/zgghrd.c b/lapack-netlib/SRC/zgghrd.c new file mode 100644 index 000000000..0b800c764 --- /dev/null +++ b/lapack-netlib/SRC/zgghrd.c @@ -0,0 +1,788 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGGHRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGHRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */ +/* LDQ, Z, LDZ, INFO ) */ + +/* CHARACTER COMPQ, COMPZ */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGHRD reduces a pair of complex matrices (A,B) to generalized upper */ +/* > Hessenberg form using unitary transformations, where A is a */ +/* > general matrix and B is upper triangular. The form of the */ +/* > generalized eigenvalue problem is */ +/* > A*x = lambda*B*x, */ +/* > and B is typically made upper triangular by computing its QR */ +/* > factorization and moving the unitary matrix Q to the left side */ +/* > of the equation. */ +/* > */ +/* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */ +/* > Q**H*A*Z = H */ +/* > and transforms B to another upper triangular matrix T: */ +/* > Q**H*B*Z = T */ +/* > in order to reduce the problem to its standard form */ +/* > H*y = lambda*T*y */ +/* > where y = Z**H*x. */ +/* > */ +/* > The unitary matrices Q and Z are determined as products of Givens */ +/* > rotations. They may either be formed explicitly, or they may be */ +/* > postmultiplied into input matrices Q1 and Z1, so that */ +/* > Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H */ +/* > Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H */ +/* > If Q1 is the unitary matrix from the QR factorization of B in the */ +/* > original equation A*x = lambda*B*x, then ZGGHRD reduces the original */ +/* > problem to generalized Hessenberg form. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPQ */ +/* > \verbatim */ +/* > COMPQ is CHARACTER*1 */ +/* > = 'N': do not compute Q; */ +/* > = 'I': Q is initialized to the unit matrix, and the */ +/* > unitary matrix Q is returned; */ +/* > = 'V': Q must contain a unitary matrix Q1 on entry, */ +/* > and the product Q1*Q is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': do not compute Z; */ +/* > = 'I': Z is initialized to the unit matrix, and the */ +/* > unitary matrix Z is returned; */ +/* > = 'V': Z must contain a unitary matrix Z1 on entry, */ +/* > and the product Z1*Z is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > ILO and IHI mark the rows and columns of A which are to be */ +/* > reduced. It is assumed that A is already upper triangular */ +/* > in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are */ +/* > normally set by a previous call to ZGGBAL; otherwise they */ +/* > should be set to 1 and N respectively. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the N-by-N general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > rest is set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the N-by-N upper triangular matrix B. */ +/* > On exit, the upper triangular matrix T = Q**H B Z. The */ +/* > elements below the diagonal are set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ, N) */ +/* > On entry, if COMPQ = 'V', the unitary matrix Q1, typically */ +/* > from the QR factorization of B. */ +/* > On exit, if COMPQ='I', the unitary matrix Q, and if */ +/* > COMPQ = 'V', the product Q1*Q. */ +/* > Not referenced if COMPQ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. */ +/* > LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > On entry, if COMPZ = 'V', the unitary matrix Z1. */ +/* > On exit, if COMPZ='I', the unitary matrix Z, and if */ +/* > COMPZ = 'V', the product Z1*Z. */ +/* > Not referenced if COMPZ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. */ +/* > LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This routine reduces A to Hessenberg and B to triangular form by */ +/* > an unblocked reduction, as described in _Matrix_Computations_, */ +/* > by Golub and van Loan (Johns Hopkins Press). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, + integer *ldz, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + integer jcol, jrow; + extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *); + doublereal c__; + doublecomplex s; + extern logical lsame_(char *, char *); + doublecomplex ctemp; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer icompq, icompz; + extern /* Subroutine */ int zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *), zlartg_(doublecomplex *, doublecomplex *, doublereal *, + doublecomplex *, doublecomplex *); + logical ilq, ilz; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Decode COMPQ */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + + /* Function Body */ + if (lsame_(compq, "N")) { + ilq = FALSE_; + icompq = 1; + } else if (lsame_(compq, "V")) { + ilq = TRUE_; + icompq = 2; + } else if (lsame_(compq, "I")) { + ilq = TRUE_; + icompq = 3; + } else { + icompq = 0; + } + +/* Decode COMPZ */ + + if (lsame_(compz, "N")) { + ilz = FALSE_; + icompz = 1; + } else if (lsame_(compz, "V")) { + ilz = TRUE_; + icompz = 2; + } else if (lsame_(compz, "I")) { + ilz = TRUE_; + icompz = 3; + } else { + icompz = 0; + } + +/* Test the input parameters. */ + + *info = 0; + if (icompq <= 0) { + *info = -1; + } else if (icompz <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*ihi > *n || *ihi < *ilo - 1) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (ilq && *ldq < *n || *ldq < 1) { + *info = -11; + } else if (ilz && *ldz < *n || *ldz < 1) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGHRD", &i__1, (ftnlen)6); + return 0; + } + +/* Initialize Q and Z if desired. */ + + if (icompq == 3) { + zlaset_("Full", n, n, &c_b2, &c_b1, &q[q_offset], ldq); + } + if (icompz == 3) { + zlaset_("Full", n, n, &c_b2, &c_b1, &z__[z_offset], ldz); + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + +/* Zero out lower triangle of B */ + + i__1 = *n - 1; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = jcol + 1; jrow <= i__2; ++jrow) { + i__3 = jrow + jcol * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + +/* Reduce A and B */ + + i__1 = *ihi - 2; + for (jcol = *ilo; jcol <= i__1; ++jcol) { + + i__2 = jcol + 2; + for (jrow = *ihi; jrow >= i__2; --jrow) { + +/* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */ + + i__3 = jrow - 1 + jcol * a_dim1; + ctemp.r = a[i__3].r, ctemp.i = a[i__3].i; + zlartg_(&ctemp, &a[jrow + jcol * a_dim1], &c__, &s, &a[jrow - 1 + + jcol * a_dim1]); + i__3 = jrow + jcol * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; + i__3 = *n - jcol; + zrot_(&i__3, &a[jrow - 1 + (jcol + 1) * a_dim1], lda, &a[jrow + ( + jcol + 1) * a_dim1], lda, &c__, &s); + i__3 = *n + 2 - jrow; + zrot_(&i__3, &b[jrow - 1 + (jrow - 1) * b_dim1], ldb, &b[jrow + ( + jrow - 1) * b_dim1], ldb, &c__, &s); + if (ilq) { + d_cnjg(&z__1, &s); + zrot_(n, &q[(jrow - 1) * q_dim1 + 1], &c__1, &q[jrow * q_dim1 + + 1], &c__1, &c__, &z__1); + } + +/* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */ + + i__3 = jrow + jrow * b_dim1; + ctemp.r = b[i__3].r, ctemp.i = b[i__3].i; + zlartg_(&ctemp, &b[jrow + (jrow - 1) * b_dim1], &c__, &s, &b[jrow + + jrow * b_dim1]); + i__3 = jrow + (jrow - 1) * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; + zrot_(ihi, &a[jrow * a_dim1 + 1], &c__1, &a[(jrow - 1) * a_dim1 + + 1], &c__1, &c__, &s); + i__3 = jrow - 1; + zrot_(&i__3, &b[jrow * b_dim1 + 1], &c__1, &b[(jrow - 1) * b_dim1 + + 1], &c__1, &c__, &s); + if (ilz) { + zrot_(n, &z__[jrow * z_dim1 + 1], &c__1, &z__[(jrow - 1) * + z_dim1 + 1], &c__1, &c__, &s); + } +/* L30: */ + } +/* L40: */ + } + + return 0; + +/* End of ZGGHRD */ + +} /* zgghrd_ */ + diff --git a/lapack-netlib/SRC/zgglse.c b/lapack-netlib/SRC/zgglse.c new file mode 100644 index 000000000..3eab80aaa --- /dev/null +++ b/lapack-netlib/SRC/zgglse.c @@ -0,0 +1,798 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGGLSE solves overdetermined or underdetermined systems for OTHER matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGLSE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), C( * ), D( * ), */ +/* $ WORK( * ), X( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGLSE solves the linear equality-constrained least squares (LSE) */ +/* > problem: */ +/* > */ +/* > minimize || c - A*x ||_2 subject to B*x = d */ +/* > */ +/* > where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */ +/* > M-vector, and d is a given P-vector. It is assumed that */ +/* > P <= N <= M+P, and */ +/* > */ +/* > rank(B) = P and rank( (A) ) = N. */ +/* > ( (B) ) */ +/* > */ +/* > These conditions ensure that the LSE problem has a unique solution, */ +/* > which is obtained using a generalized RQ factorization of the */ +/* > matrices (B, A) given by */ +/* > */ +/* > B = (0 R)*Q, A = Z*T*Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. 0 <= P <= N <= M+P. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */ +/* > contains the P-by-P upper triangular matrix R. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (M) */ +/* > On entry, C contains the right hand side vector for the */ +/* > least squares part of the LSE problem. */ +/* > On exit, the residual sum of squares for the solution */ +/* > is given by the sum of squares of elements N-P+1 to M of */ +/* > vector C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (P) */ +/* > On entry, D contains the right hand side vector for the */ +/* > constrained equation. */ +/* > On exit, D is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (N) */ +/* > On exit, X is the solution of the LSE problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,M+N+P). */ +/* > For optimum performance LWORK >= P+f2cmin(M,N)+f2cmax(M,N)*NB, */ +/* > where NB is an upper bound for the optimal blocksizes for */ +/* > ZGEQRF, CGERQF, ZUNMQR and CUNMRQ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1: the upper triangular factor R associated with B in the */ +/* > generalized RQ factorization of the pair (B, A) is */ +/* > singular, so that rank(B) < P; the least squares */ +/* > solution could not be computed. */ +/* > = 2: the (N-P) by (N-P) part of the upper trapezoidal factor */ +/* > T associated with A in the generalized RQ factorization */ +/* > of the pair (B, A) is singular, so that */ +/* > rank( (A) ) < N; the least squares solution could not */ +/* > ( (B) ) */ +/* > be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHERsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zgglse_(integer *m, integer *n, integer *p, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublecomplex *c__, doublecomplex *d__, doublecomplex *x, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Local variables */ + integer lopt; + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, + integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *), ztrmv_(char *, char *, + char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer nb, mn, nr; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zggrqf_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer *) + ; + integer lwkmin, nb1, nb2, nb3, nb4, lwkopt; + logical lquery; + extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmrq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), ztrtrs_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --c__; + --d__; + --x; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*p < 0 || *p > *n || *p < *n - *m) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*p)) { + *info = -7; + } + +/* Calculate workspace */ + + if (*info == 0) { + if (*n == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = ilaenv_(&c__1, "ZGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb2 = ilaenv_(&c__1, "ZGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb3 = ilaenv_(&c__1, "ZUNMQR", " ", m, n, p, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb4 = ilaenv_(&c__1, "ZUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, ( + ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3); + nb = f2cmax(i__1,nb4); + lwkmin = *m + *n + *p; + lwkopt = *p + mn + f2cmax(*m,*n) * nb; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGLSE", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Compute the GRQ factorization of matrices B and A: */ + +/* B*Q**H = ( 0 T12 ) P Z**H*A*Q**H = ( R11 R12 ) N-P */ +/* N-P P ( 0 R22 ) M+P-N */ +/* N-P P */ + +/* where T12 and R11 are upper triangular, and Q and Z are */ +/* unitary. */ + + i__1 = *lwork - *p - mn; + zggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p + + 1], &work[*p + mn + 1], &i__1, info); + i__1 = *p + mn + 1; + lopt = (integer) work[i__1].r; + +/* Update c = Z**H *c = ( c1 ) N-P */ +/* ( c2 ) M+P-N */ + + i__1 = f2cmax(1,*m); + i__2 = *lwork - *p - mn; + zunmqr_("Left", "Conjugate Transpose", m, &c__1, &mn, &a[a_offset], lda, & + work[*p + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info); +/* Computing MAX */ + i__3 = *p + mn + 1; + i__1 = lopt, i__2 = (integer) work[i__3].r; + lopt = f2cmax(i__1,i__2); + +/* Solve T12*x2 = d for x2 */ + + if (*p > 0) { + ztrtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + + 1) * b_dim1 + 1], ldb, &d__[1], p, info); + + if (*info > 0) { + *info = 1; + return 0; + } + +/* Put the solution in X */ + + zcopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1); + +/* Update c1 */ + + i__1 = *n - *p; + z__1.r = -1., z__1.i = 0.; + zgemv_("No transpose", &i__1, p, &z__1, &a[(*n - *p + 1) * a_dim1 + 1] + , lda, &d__[1], &c__1, &c_b1, &c__[1], &c__1); + } + +/* Solve R11*x1 = c1 for x1 */ + + if (*n > *p) { + i__1 = *n - *p; + i__2 = *n - *p; + ztrtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[ + a_offset], lda, &c__[1], &i__2, info); + + if (*info > 0) { + *info = 2; + return 0; + } + +/* Put the solutions in X */ + + i__1 = *n - *p; + zcopy_(&i__1, &c__[1], &c__1, &x[1], &c__1); + } + +/* Compute the residual vector: */ + + if (*m < *n) { + nr = *m + *p - *n; + if (nr > 0) { + i__1 = *n - *m; + z__1.r = -1., z__1.i = 0.; + zgemv_("No transpose", &nr, &i__1, &z__1, &a[*n - *p + 1 + (*m + + 1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b1, &c__[*n - * + p + 1], &c__1); + } + } else { + nr = *p; + } + if (nr > 0) { + ztrmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n + - *p + 1) * a_dim1], lda, &d__[1], &c__1); + z__1.r = -1., z__1.i = 0.; + zaxpy_(&nr, &z__1, &d__[1], &c__1, &c__[*n - *p + 1], &c__1); + } + +/* Backward transformation x = Q**H*x */ + + i__1 = *lwork - *p - mn; + zunmrq_("Left", "Conjugate Transpose", n, &c__1, p, &b[b_offset], ldb, & + work[1], &x[1], n, &work[*p + mn + 1], &i__1, info); +/* Computing MAX */ + i__4 = *p + mn + 1; + i__2 = lopt, i__3 = (integer) work[i__4].r; + i__1 = *p + mn + f2cmax(i__2,i__3); + work[1].r = (doublereal) i__1, work[1].i = 0.; + + return 0; + +/* End of ZGGLSE */ + +} /* zgglse_ */ + diff --git a/lapack-netlib/SRC/zggqrf.c b/lapack-netlib/SRC/zggqrf.c new file mode 100644 index 000000000..16ac41fad --- /dev/null +++ b/lapack-netlib/SRC/zggqrf.c @@ -0,0 +1,720 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGGQRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGQRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, */ +/* LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGQRF computes a generalized QR factorization of an N-by-M matrix A */ +/* > and an N-by-P matrix B: */ +/* > */ +/* > A = Q*R, B = Q*T*Z, */ +/* > */ +/* > where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, */ +/* > and R and T assume one of the forms: */ +/* > */ +/* > if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, */ +/* > ( 0 ) N-M N M-N */ +/* > M */ +/* > */ +/* > where R11 is upper triangular, and */ +/* > */ +/* > if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, */ +/* > P-N N ( T21 ) P */ +/* > P */ +/* > */ +/* > where T12 or T21 is upper triangular. */ +/* > */ +/* > In particular, if B is square and nonsingular, the GQR factorization */ +/* > of A and B implicitly gives the QR factorization of inv(B)*A: */ +/* > */ +/* > inv(B)*A = Z**H * (inv(T)*R) */ +/* > */ +/* > where inv(B) denotes the inverse of the matrix B, and Z**H denotes the */ +/* > conjugate transpose of matrix Z. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of columns of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,M) */ +/* > On entry, the N-by-M matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(N,M)-by-M upper trapezoidal matrix R (R is */ +/* > upper triangular if N >= M); the elements below the diagonal, */ +/* > with the array TAUA, represent the unitary matrix Q as a */ +/* > product of f2cmin(N,M) elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUA */ +/* > \verbatim */ +/* > TAUA is COMPLEX*16 array, dimension (f2cmin(N,M)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the unitary matrix Q (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,P) */ +/* > On entry, the N-by-P matrix B. */ +/* > On exit, if N <= P, the upper triangle of the subarray */ +/* > B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ +/* > if N > P, the elements on and above the (N-P)-th subdiagonal */ +/* > contain the N-by-P upper trapezoidal matrix T; the remaining */ +/* > elements, with the array TAUB, represent the unitary */ +/* > matrix Z as a product of elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUB */ +/* > \verbatim */ +/* > TAUB is COMPLEX*16 array, dimension (f2cmin(N,P)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the unitary matrix Z (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */ +/* > For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */ +/* > where NB1 is the optimal blocksize for the QR factorization */ +/* > of an N-by-M matrix, NB2 is the optimal blocksize for the */ +/* > RQ factorization of an N-by-P matrix, and NB3 is the optimal */ +/* > blocksize for a call of ZUNMQR. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(n,m). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taua * v * v**H */ +/* > */ +/* > where taua is a complex scalar, and v is a complex vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ +/* > and taua in TAUA(i). */ +/* > To form Q explicitly, use LAPACK subroutine ZUNGQR. */ +/* > To use Q to update another matrix, use LAPACK subroutine ZUNMQR. */ +/* > */ +/* > The matrix Z is represented as a product of elementary reflectors */ +/* > */ +/* > Z = H(1) H(2) . . . H(k), where k = f2cmin(n,p). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taub * v * v**H */ +/* > */ +/* > where taub is a complex scalar, and v is a complex vector with */ +/* > v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in */ +/* > B(n-k+i,1:p-k+i-1), and taub in TAUB(i). */ +/* > To form Z explicitly, use LAPACK subroutine ZUNGRQ. */ +/* > To use Z to update another matrix, use LAPACK subroutine ZUNMRQ. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zggqrf_(integer *n, integer *m, integer *p, + doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, + integer *ldb, doublecomplex *taub, doublecomplex *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer lopt, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zgerqf_(integer *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *); + integer nb1, nb2, nb3, lwkopt; + logical lquery; + extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --taua; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --taub; + --work; + + /* Function Body */ + *info = 0; + nb1 = ilaenv_(&c__1, "ZGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb2 = ilaenv_(&c__1, "ZGERQF", " ", n, p, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb3 = ilaenv_(&c__1, "ZUNMQR", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2); + nb = f2cmax(i__1,nb3); +/* Computing MAX */ + i__1 = f2cmax(*n,*m); + lwkopt = f2cmax(i__1,*p) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*p < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*n), i__1 = f2cmax(i__1,*m); + if (*lwork < f2cmax(i__1,*p) && ! lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGQRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* QR factorization of N-by-M matrix A: A = Q*R */ + + zgeqrf_(n, m, &a[a_offset], lda, &taua[1], &work[1], lwork, info); + lopt = (integer) work[1].r; + +/* Update B := Q**H*B. */ + + i__1 = f2cmin(*n,*m); + zunmqr_("Left", "Conjugate Transpose", n, p, &i__1, &a[a_offset], lda, & + taua[1], &b[b_offset], ldb, &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1].r; + lopt = f2cmax(i__1,i__2); + +/* RQ factorization of N-by-P matrix B: B = T*Z. */ + + zgerqf_(n, p, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); +/* Computing MAX */ + i__2 = lopt, i__3 = (integer) work[1].r; + i__1 = f2cmax(i__2,i__3); + work[1].r = (doublereal) i__1, work[1].i = 0.; + + return 0; + +/* End of ZGGQRF */ + +} /* zggqrf_ */ + diff --git a/lapack-netlib/SRC/zggrqf.c b/lapack-netlib/SRC/zggrqf.c new file mode 100644 index 000000000..9b65ee254 --- /dev/null +++ b/lapack-netlib/SRC/zggrqf.c @@ -0,0 +1,721 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGGRQF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGRQF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, */ +/* LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGRQF computes a generalized RQ factorization of an M-by-N matrix A */ +/* > and a P-by-N matrix B: */ +/* > */ +/* > A = R*Q, B = Z*T*Q, */ +/* > */ +/* > where Q is an N-by-N unitary matrix, Z is a P-by-P unitary */ +/* > matrix, and R and T assume one of the forms: */ +/* > */ +/* > if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, */ +/* > N-M M ( R21 ) N */ +/* > N */ +/* > */ +/* > where R12 or R21 is upper triangular, and */ +/* > */ +/* > if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, */ +/* > ( 0 ) P-N P N-P */ +/* > N */ +/* > */ +/* > where T11 is upper triangular. */ +/* > */ +/* > In particular, if B is square and nonsingular, the GRQ factorization */ +/* > of A and B implicitly gives the RQ factorization of A*inv(B): */ +/* > */ +/* > A*inv(B) = (R*inv(T))*Z**H */ +/* > */ +/* > where inv(B) denotes the inverse of the matrix B, and Z**H denotes the */ +/* > conjugate transpose of the matrix Z. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, if M <= N, the upper triangle of the subarray */ +/* > A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; */ +/* > if M > N, the elements on and above the (M-N)-th subdiagonal */ +/* > contain the M-by-N upper trapezoidal matrix R; the remaining */ +/* > elements, with the array TAUA, represent the unitary */ +/* > matrix Q as a product of elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUA */ +/* > \verbatim */ +/* > TAUA is COMPLEX*16 array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the unitary matrix Q (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(P,N)-by-N upper trapezoidal matrix T (T is */ +/* > upper triangular if P >= N); the elements below the diagonal, */ +/* > with the array TAUB, represent the unitary matrix Z as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUB */ +/* > \verbatim */ +/* > TAUB is COMPLEX*16 array, dimension (f2cmin(P,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the unitary matrix Z (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */ +/* > For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */ +/* > where NB1 is the optimal blocksize for the RQ factorization */ +/* > of an M-by-N matrix, NB2 is the optimal blocksize for the */ +/* > QR factorization of a P-by-N matrix, and NB3 is the optimal */ +/* > blocksize for a call of ZUNMRQ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO=-i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taua * v * v**H */ +/* > */ +/* > where taua is a complex scalar, and v is a complex vector with */ +/* > v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* > A(m-k+i,1:n-k+i-1), and taua in TAUA(i). */ +/* > To form Q explicitly, use LAPACK subroutine ZUNGRQ. */ +/* > To use Q to update another matrix, use LAPACK subroutine ZUNMRQ. */ +/* > */ +/* > The matrix Z is represented as a product of elementary reflectors */ +/* > */ +/* > Z = H(1) H(2) . . . H(k), where k = f2cmin(p,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taub * v * v**H */ +/* > */ +/* > where taub is a complex scalar, and v is a complex vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), */ +/* > and taub in TAUB(i). */ +/* > To form Z explicitly, use LAPACK subroutine ZUNGQR. */ +/* > To use Z to update another matrix, use LAPACK subroutine ZUNMQR. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zggrqf_(integer *m, integer *p, integer *n, + doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b, + integer *ldb, doublecomplex *taub, doublecomplex *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer lopt, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zgerqf_(integer *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *); + integer nb1, nb2, nb3, lwkopt; + logical lquery; + extern /* Subroutine */ int zunmrq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --taua; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --taub; + --work; + + /* Function Body */ + *info = 0; + nb1 = ilaenv_(&c__1, "ZGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb2 = ilaenv_(&c__1, "ZGEQRF", " ", p, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb3 = ilaenv_(&c__1, "ZUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2); + nb = f2cmax(i__1,nb3); +/* Computing MAX */ + i__1 = f2cmax(*n,*m); + lwkopt = f2cmax(i__1,*p) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*p < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*p)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m), i__1 = f2cmax(i__1,*p); + if (*lwork < f2cmax(i__1,*n) && ! lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGRQF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* RQ factorization of M-by-N matrix A: A = R*Q */ + + zgerqf_(m, n, &a[a_offset], lda, &taua[1], &work[1], lwork, info); + lopt = (integer) work[1].r; + +/* Update B := B*Q**H */ + + i__1 = f2cmin(*m,*n); +/* Computing MAX */ + i__2 = 1, i__3 = *m - *n + 1; + zunmrq_("Right", "Conjugate Transpose", p, n, &i__1, &a[f2cmax(i__2,i__3) + + a_dim1], lda, &taua[1], &b[b_offset], ldb, &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1].r; + lopt = f2cmax(i__1,i__2); + +/* QR factorization of P-by-N matrix B: B = Z*T */ + + zgeqrf_(p, n, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); +/* Computing MAX */ + i__2 = lopt, i__3 = (integer) work[1].r; + i__1 = f2cmax(i__2,i__3); + work[1].r = (doublereal) i__1, work[1].i = 0.; + + return 0; + +/* End of ZGGRQF */ + +} /* zggrqf_ */ + diff --git a/lapack-netlib/SRC/zggsvd3.c b/lapack-netlib/SRC/zggsvd3.c new file mode 100644 index 000000000..eb59ef649 --- /dev/null +++ b/lapack-netlib/SRC/zggsvd3.c @@ -0,0 +1,947 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGSVD3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, */ +/* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, */ +/* LWORK, RWORK, IWORK, INFO ) */ + +/* CHARACTER JOBQ, JOBU, JOBV */ +/* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION ALPHA( * ), BETA( * ), RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ U( LDU, * ), V( LDV, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGSVD3 computes the generalized singular value decomposition (GSVD) */ +/* > of an M-by-N complex matrix A and P-by-N complex matrix B: */ +/* > */ +/* > U**H*A*Q = D1*( 0 R ), V**H*B*Q = D2*( 0 R ) */ +/* > */ +/* > where U, V and Q are unitary matrices. */ +/* > Let K+L = the effective numerical rank of the */ +/* > matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper */ +/* > triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" */ +/* > matrices and of the following structures, respectively: */ +/* > */ +/* > If M-K-L >= 0, */ +/* > */ +/* > K L */ +/* > D1 = K ( I 0 ) */ +/* > L ( 0 C ) */ +/* > M-K-L ( 0 0 ) */ +/* > */ +/* > K L */ +/* > D2 = L ( 0 S ) */ +/* > P-L ( 0 0 ) */ +/* > */ +/* > N-K-L K L */ +/* > ( 0 R ) = K ( 0 R11 R12 ) */ +/* > L ( 0 0 R22 ) */ +/* > where */ +/* > */ +/* > C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */ +/* > S = diag( BETA(K+1), ... , BETA(K+L) ), */ +/* > C**2 + S**2 = I. */ +/* > */ +/* > R is stored in A(1:K+L,N-K-L+1:N) on exit. */ +/* > */ +/* > If M-K-L < 0, */ +/* > */ +/* > K M-K K+L-M */ +/* > D1 = K ( I 0 0 ) */ +/* > M-K ( 0 C 0 ) */ +/* > */ +/* > K M-K K+L-M */ +/* > D2 = M-K ( 0 S 0 ) */ +/* > K+L-M ( 0 0 I ) */ +/* > P-L ( 0 0 0 ) */ +/* > */ +/* > N-K-L K M-K K+L-M */ +/* > ( 0 R ) = K ( 0 R11 R12 R13 ) */ +/* > M-K ( 0 0 R22 R23 ) */ +/* > K+L-M ( 0 0 0 R33 ) */ +/* > */ +/* > where */ +/* > */ +/* > C = diag( ALPHA(K+1), ... , ALPHA(M) ), */ +/* > S = diag( BETA(K+1), ... , BETA(M) ), */ +/* > C**2 + S**2 = I. */ +/* > */ +/* > (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored */ +/* > ( 0 R22 R23 ) */ +/* > in B(M-K+1:L,N+M-K-L+1:N) on exit. */ +/* > */ +/* > The routine computes C, S, R, and optionally the unitary */ +/* > transformation matrices U, V and Q. */ +/* > */ +/* > In particular, if B is an N-by-N nonsingular matrix, then the GSVD of */ +/* > A and B implicitly gives the SVD of A*inv(B): */ +/* > A*inv(B) = U*(D1*inv(D2))*V**H. */ +/* > If ( A**H,B**H)**H has orthonormal columns, then the GSVD of A and B is also */ +/* > equal to the CS decomposition of A and B. Furthermore, the GSVD can */ +/* > be used to derive the solution of the eigenvalue problem: */ +/* > A**H*A x = lambda* B**H*B x. */ +/* > In some literature, the GSVD of A and B is presented in the form */ +/* > U**H*A*X = ( 0 D1 ), V**H*B*X = ( 0 D2 ) */ +/* > where U and V are orthogonal and X is nonsingular, and D1 and D2 are */ +/* > ``diagonal''. The former GSVD form can be converted to the latter */ +/* > form by taking the nonsingular matrix X as */ +/* > */ +/* > X = Q*( I 0 ) */ +/* > ( 0 inv(R) ) */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > = 'U': Unitary matrix U is computed; */ +/* > = 'N': U is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > = 'V': Unitary matrix V is computed; */ +/* > = 'N': V is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBQ */ +/* > \verbatim */ +/* > JOBQ is CHARACTER*1 */ +/* > = 'Q': Unitary matrix Q is computed; */ +/* > = 'N': Q is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] L */ +/* > \verbatim */ +/* > L is INTEGER */ +/* > */ +/* > On exit, K and L specify the dimension of the subblocks */ +/* > described in Purpose. */ +/* > K + L = effective numerical rank of (A**H,B**H)**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A contains the triangular matrix R, or part of R. */ +/* > See Purpose for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, B contains part of the triangular matrix R if */ +/* > M-K-L < 0. See Purpose for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION array, dimension (N) */ +/* > */ +/* > On exit, ALPHA and BETA contain the generalized singular */ +/* > value pairs of A and B; */ +/* > ALPHA(1:K) = 1, */ +/* > BETA(1:K) = 0, */ +/* > and if M-K-L >= 0, */ +/* > ALPHA(K+1:K+L) = C, */ +/* > BETA(K+1:K+L) = S, */ +/* > or if M-K-L < 0, */ +/* > ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 */ +/* > BETA(K+1:M) =S, BETA(M+1:K+L) =1 */ +/* > and */ +/* > ALPHA(K+L+1:N) = 0 */ +/* > BETA(K+L+1:N) = 0 */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is COMPLEX*16 array, dimension (LDU,M) */ +/* > If JOBU = 'U', U contains the M-by-M unitary matrix U. */ +/* > If JOBU = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= f2cmax(1,M) if */ +/* > JOBU = 'U'; LDU >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension (LDV,P) */ +/* > If JOBV = 'V', V contains the P-by-P unitary matrix V. */ +/* > If JOBV = 'N', V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,P) if */ +/* > JOBV = 'V'; LDV >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ,N) */ +/* > If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. */ +/* > If JOBQ = 'N', Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */ +/* > JOBQ = 'Q'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > On exit, IWORK stores the sorting information. More */ +/* > precisely, the following loop will sort ALPHA */ +/* > for I = K+1, f2cmin(M,K+L) */ +/* > swap ALPHA(I) and ALPHA(IWORK(I)) */ +/* > endfor */ +/* > such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, the Jacobi-type procedure failed to */ +/* > converge. For further details, see subroutine ZTGSJA. */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > TOLA DOUBLE PRECISION */ +/* > TOLB DOUBLE PRECISION */ +/* > TOLA and TOLB are the thresholds to determine the effective */ +/* > rank of (A**H,B**H)**H. Generally, they are set to */ +/* > TOLA = MAX(M,N)*norm(A)*MACHEPS, */ +/* > TOLB = MAX(P,N)*norm(B)*MACHEPS. */ +/* > The size of TOLA and TOLB may affect the size of backward */ +/* > errors of the decomposition. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date August 2015 */ + +/* > \ingroup complex16GEsing */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > ZGGSVD3 replaces the deprecated subroutine ZGGSVD. */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zggsvd3_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb, doublereal *alpha, + doublereal *beta, doublecomplex *u, integer *ldu, doublecomplex *v, + integer *ldv, doublecomplex *q, integer *ldq, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, + u_offset, v_dim1, v_offset, i__1, i__2; + doublecomplex z__1; + + /* Local variables */ + integer ibnd; + doublereal tola; + integer isub; + doublereal tolb, unfl, temp, smax; + integer ncallmycycle, i__, j; + extern logical lsame_(char *, char *); + doublereal anorm, bnorm; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical wantq, wantu, wantv; + extern doublereal dlamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int ztgsja_(char *, char *, char *, integer *, + integer *, integer *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zggsvp3_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, doublereal *, + doublecomplex *, doublecomplex *, integer *, integer *); + doublereal ulp; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* August 2015 */ + + +/* ===================================================================== */ + + +/* Decode and test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantu = lsame_(jobu, "U"); + wantv = lsame_(jobv, "V"); + wantq = lsame_(jobq, "Q"); + lquery = *lwork == -1; + lwkopt = 1; + +/* Test the input arguments */ + + *info = 0; + if (! (wantu || lsame_(jobu, "N"))) { + *info = -1; + } else if (! (wantv || lsame_(jobv, "N"))) { + *info = -2; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*p < 0) { + *info = -6; + } else if (*lda < f2cmax(1,*m)) { + *info = -10; + } else if (*ldb < f2cmax(1,*p)) { + *info = -12; + } else if (*ldu < 1 || wantu && *ldu < *m) { + *info = -16; + } else if (*ldv < 1 || wantv && *ldv < *p) { + *info = -18; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -20; + } else if (*lwork < 1 && ! lquery) { + *info = -24; + } + +/* Compute workspace */ + + if (*info == 0) { + zggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], + ldb, &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, + &q[q_offset], ldq, &iwork[1], &rwork[1], &work[1], &work[1], + &c_n1, info); + lwkopt = *n + (integer) work[1].r; +/* Computing MAX */ + i__1 = *n << 1; + lwkopt = f2cmax(i__1,lwkopt); + lwkopt = f2cmax(1,lwkopt); + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGSVD3", &i__1, (ftnlen)7); + return 0; + } + if (lquery) { + return 0; + } + +/* Compute the Frobenius norm of matrices A and B */ + + anorm = zlange_("1", m, n, &a[a_offset], lda, &rwork[1]); + bnorm = zlange_("1", p, n, &b[b_offset], ldb, &rwork[1]); + +/* Get machine precision and set up threshold for determining */ +/* the effective numerical rank of the matrices A and B. */ + + ulp = dlamch_("Precision"); + unfl = dlamch_("Safe Minimum"); + tola = f2cmax(*m,*n) * f2cmax(anorm,unfl) * ulp; + tolb = f2cmax(*p,*n) * f2cmax(bnorm,unfl) * ulp; + + i__1 = *lwork - *n; + zggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], ldb, + &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, &q[ + q_offset], ldq, &iwork[1], &rwork[1], &work[1], &work[*n + 1], & + i__1, info); + +/* Compute the GSVD of two upper "triangular" matrices */ + + ztgsja_(jobu, jobv, jobq, m, p, n, k, l, &a[a_offset], lda, &b[b_offset], + ldb, &tola, &tolb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[ + v_offset], ldv, &q[q_offset], ldq, &work[1], &ncallmycycle, info); + +/* Sort the singular values and store the pivot indices in IWORK */ +/* Copy ALPHA to RWORK, then sort ALPHA in RWORK */ + + dcopy_(n, &alpha[1], &c__1, &rwork[1], &c__1); +/* Computing MIN */ + i__1 = *l, i__2 = *m - *k; + ibnd = f2cmin(i__1,i__2); + i__1 = ibnd; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for largest ALPHA(K+I) */ + + isub = i__; + smax = rwork[*k + i__]; + i__2 = ibnd; + for (j = i__ + 1; j <= i__2; ++j) { + temp = rwork[*k + j]; + if (temp > smax) { + isub = j; + smax = temp; + } +/* L10: */ + } + if (isub != i__) { + rwork[*k + isub] = rwork[*k + i__]; + rwork[*k + i__] = smax; + iwork[*k + i__] = *k + isub; + } else { + iwork[*k + i__] = *k + i__; + } +/* L20: */ + } + + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + +/* End of ZGGSVD3 */ + +} /* zggsvd3_ */ + diff --git a/lapack-netlib/SRC/zggsvp3.c b/lapack-netlib/SRC/zggsvp3.c new file mode 100644 index 000000000..a117c23d4 --- /dev/null +++ b/lapack-netlib/SRC/zggsvp3.c @@ -0,0 +1,1073 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGGSVP3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGGSVP3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, */ +/* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, */ +/* IWORK, RWORK, TAU, WORK, LWORK, INFO ) */ + +/* CHARACTER JOBQ, JOBU, JOBV */ +/* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */ +/* DOUBLE PRECISION TOLA, TOLB */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGGSVP3 computes unitary matrices U, V and Q such that */ +/* > */ +/* > N-K-L K L */ +/* > U**H*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; */ +/* > L ( 0 0 A23 ) */ +/* > M-K-L ( 0 0 0 ) */ +/* > */ +/* > N-K-L K L */ +/* > = K ( 0 A12 A13 ) if M-K-L < 0; */ +/* > M-K ( 0 0 A23 ) */ +/* > */ +/* > N-K-L K L */ +/* > V**H*B*Q = L ( 0 0 B13 ) */ +/* > P-L ( 0 0 0 ) */ +/* > */ +/* > where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */ +/* > upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */ +/* > otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective */ +/* > numerical rank of the (M+P)-by-N matrix (A**H,B**H)**H. */ +/* > */ +/* > This decomposition is the preprocessing step for computing the */ +/* > Generalized Singular Value Decomposition (GSVD), see subroutine */ +/* > ZGGSVD3. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > = 'U': Unitary matrix U is computed; */ +/* > = 'N': U is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > = 'V': Unitary matrix V is computed; */ +/* > = 'N': V is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBQ */ +/* > \verbatim */ +/* > JOBQ is CHARACTER*1 */ +/* > = 'Q': Unitary matrix Q is computed; */ +/* > = 'N': Q is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A contains the triangular (or trapezoidal) matrix */ +/* > described in the Purpose section. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, B contains the triangular matrix described in */ +/* > the Purpose section. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOLA */ +/* > \verbatim */ +/* > TOLA is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOLB */ +/* > \verbatim */ +/* > TOLB is DOUBLE PRECISION */ +/* > */ +/* > TOLA and TOLB are the thresholds to determine the effective */ +/* > numerical rank of matrix B and a subblock of A. Generally, */ +/* > they are set to */ +/* > TOLA = MAX(M,N)*norm(A)*MAZHEPS, */ +/* > TOLB = MAX(P,N)*norm(B)*MAZHEPS. */ +/* > The size of TOLA and TOLB may affect the size of backward */ +/* > errors of the decomposition. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] L */ +/* > \verbatim */ +/* > L is INTEGER */ +/* > */ +/* > On exit, K and L specify the dimension of the subblocks */ +/* > described in Purpose section. */ +/* > K + L = effective numerical rank of (A**H,B**H)**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is COMPLEX*16 array, dimension (LDU,M) */ +/* > If JOBU = 'U', U contains the unitary matrix U. */ +/* > If JOBU = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= f2cmax(1,M) if */ +/* > JOBU = 'U'; LDU >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension (LDV,P) */ +/* > If JOBV = 'V', V contains the unitary matrix V. */ +/* > If JOBV = 'N', V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,P) if */ +/* > JOBV = 'V'; LDV >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ,N) */ +/* > If JOBQ = 'Q', Q contains the unitary matrix Q. */ +/* > If JOBQ = 'N', Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */ +/* > JOBQ = 'Q'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date August 2015 */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ + +/* > \verbatim */ +/* > */ +/* > The subroutine uses LAPACK subroutine ZGEQP3 for the QR factorization */ +/* > with column pivoting to detect the effective numerical rank of the */ +/* > a matrix. It may be replaced by a better rank determination strategy. */ +/* > */ +/* > ZGGSVP3 replaces the deprecated subroutine ZGGSVP. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zggsvp3_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, doublecomplex *a, integer *lda, doublecomplex + *b, integer *ldb, doublereal *tola, doublereal *tolb, integer *k, + integer *l, doublecomplex *u, integer *ldu, doublecomplex *v, integer + *ldv, doublecomplex *q, integer *ldq, integer *iwork, doublereal * + rwork, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, + u_offset, v_dim1, v_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *); + logical wantq, wantu, wantv; + extern /* Subroutine */ int zgeqp3_(integer *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, doublecomplex *, integer * + , doublereal *, integer *), zgeqr2_(integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zgerq2_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), zung2r_( + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *), zunm2r_(char *, + char *, integer *, integer *, integer *, doublecomplex *, integer + *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), zunmr2_(char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *, ftnlen), zlacpy_(char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + logical forwrd; + extern /* Subroutine */ int zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *), zlapmt_(logical *, integer *, integer *, doublecomplex *, + integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* August 2015 */ + + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --iwork; + --rwork; + --tau; + --work; + + /* Function Body */ + wantu = lsame_(jobu, "U"); + wantv = lsame_(jobv, "V"); + wantq = lsame_(jobq, "Q"); + forwrd = TRUE_; + lquery = *lwork == -1; + lwkopt = 1; + +/* Test the input arguments */ + + *info = 0; + if (! (wantu || lsame_(jobu, "N"))) { + *info = -1; + } else if (! (wantv || lsame_(jobv, "N"))) { + *info = -2; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*p < 0) { + *info = -5; + } else if (*n < 0) { + *info = -6; + } else if (*lda < f2cmax(1,*m)) { + *info = -8; + } else if (*ldb < f2cmax(1,*p)) { + *info = -10; + } else if (*ldu < 1 || wantu && *ldu < *m) { + *info = -16; + } else if (*ldv < 1 || wantv && *ldv < *p) { + *info = -18; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -20; + } else if (*lwork < 1 && ! lquery) { + *info = -24; + } + +/* Compute workspace */ + + if (*info == 0) { + zgeqp3_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], &c_n1, + &rwork[1], info); + lwkopt = (integer) work[1].r; + if (wantv) { + lwkopt = f2cmax(lwkopt,*p); + } +/* Computing MAX */ + i__1 = lwkopt, i__2 = f2cmin(*n,*p); + lwkopt = f2cmax(i__1,i__2); + lwkopt = f2cmax(lwkopt,*m); + if (wantq) { + lwkopt = f2cmax(lwkopt,*n); + } + zgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], &c_n1, + &rwork[1], info); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + lwkopt = f2cmax(1,lwkopt); + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGGSVP3", &i__1, (ftnlen)7); + return 0; + } + if (lquery) { + return 0; + } + +/* QR with column pivoting of B: B*P = V*( S11 S12 ) */ +/* ( 0 0 ) */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; +/* L10: */ + } + zgeqp3_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], lwork, & + rwork[1], info); + +/* Update A := A*P */ + + zlapmt_(&forwrd, m, n, &a[a_offset], lda, &iwork[1]); + +/* Determine the effective rank of matrix B. */ + + *l = 0; + i__1 = f2cmin(*p,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + if (z_abs(&b[i__ + i__ * b_dim1]) > *tolb) { + ++(*l); + } +/* L20: */ + } + + if (wantv) { + +/* Copy the details of V, and form V. */ + + zlaset_("Full", p, p, &c_b1, &c_b1, &v[v_offset], ldv); + if (*p > 1) { + i__1 = *p - 1; + zlacpy_("Lower", &i__1, n, &b[b_dim1 + 2], ldb, &v[v_dim1 + 2], + ldv); + } + i__1 = f2cmin(*p,*n); + zung2r_(p, p, &i__1, &v[v_offset], ldv, &tau[1], &work[1], info); + } + +/* Clean up B */ + + i__1 = *l - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *l; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + if (*p > *l) { + i__1 = *p - *l; + zlaset_("Full", &i__1, n, &c_b1, &c_b1, &b[*l + 1 + b_dim1], ldb); + } + + if (wantq) { + +/* Set Q = I and Update Q := Q*P */ + + zlaset_("Full", n, n, &c_b1, &c_b2, &q[q_offset], ldq); + zlapmt_(&forwrd, n, n, &q[q_offset], ldq, &iwork[1]); + } + + if (*p >= *l && *n != *l) { + +/* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z */ + + zgerq2_(l, n, &b[b_offset], ldb, &tau[1], &work[1], info); + +/* Update A := A*Z**H */ + + zunmr2_("Right", "Conjugate transpose", m, n, l, &b[b_offset], ldb, & + tau[1], &a[a_offset], lda, &work[1], info); + if (wantq) { + +/* Update Q := Q*Z**H */ + + zunmr2_("Right", "Conjugate transpose", n, n, l, &b[b_offset], + ldb, &tau[1], &q[q_offset], ldq, &work[1], info); + } + +/* Clean up B */ + + i__1 = *n - *l; + zlaset_("Full", l, &i__1, &c_b1, &c_b1, &b[b_offset], ldb); + i__1 = *n; + for (j = *n - *l + 1; j <= i__1; ++j) { + i__2 = *l; + for (i__ = j - *n + *l + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L50: */ + } +/* L60: */ + } + + } + +/* Let N-L L */ +/* A = ( A11 A12 ) M, */ + +/* then the following does the complete QR decomposition of A11: */ + +/* A11 = U*( 0 T12 )*P1**H */ +/* ( 0 0 ) */ + + i__1 = *n - *l; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; +/* L70: */ + } + i__1 = *n - *l; + zgeqp3_(m, &i__1, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], lwork, + &rwork[1], info); + +/* Determine the effective rank of A11 */ + + *k = 0; +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = f2cmin(i__2,i__3); + for (i__ = 1; i__ <= i__1; ++i__) { + if (z_abs(&a[i__ + i__ * a_dim1]) > *tola) { + ++(*k); + } +/* L80: */ + } + +/* Update A12 := U**H*A12, where A12 = A( 1:M, N-L+1:N ) */ + +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = f2cmin(i__2,i__3); + zunm2r_("Left", "Conjugate transpose", m, l, &i__1, &a[a_offset], lda, & + tau[1], &a[(*n - *l + 1) * a_dim1 + 1], lda, &work[1], info); + + if (wantu) { + +/* Copy the details of U, and form U */ + + zlaset_("Full", m, m, &c_b1, &c_b1, &u[u_offset], ldu); + if (*m > 1) { + i__1 = *m - 1; + i__2 = *n - *l; + zlacpy_("Lower", &i__1, &i__2, &a[a_dim1 + 2], lda, &u[u_dim1 + 2] + , ldu); + } +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = f2cmin(i__2,i__3); + zung2r_(m, m, &i__1, &u[u_offset], ldu, &tau[1], &work[1], info); + } + + if (wantq) { + +/* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 */ + + i__1 = *n - *l; + zlapmt_(&forwrd, n, &i__1, &q[q_offset], ldq, &iwork[1]); + } + +/* Clean up A: set the strictly lower triangular part of */ +/* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. */ + + i__1 = *k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L90: */ + } +/* L100: */ + } + if (*m > *k) { + i__1 = *m - *k; + i__2 = *n - *l; + zlaset_("Full", &i__1, &i__2, &c_b1, &c_b1, &a[*k + 1 + a_dim1], lda); + } + + if (*n - *l > *k) { + +/* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 */ + + i__1 = *n - *l; + zgerq2_(k, &i__1, &a[a_offset], lda, &tau[1], &work[1], info); + + if (wantq) { + +/* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**H */ + + i__1 = *n - *l; + zunmr2_("Right", "Conjugate transpose", n, &i__1, k, &a[a_offset], + lda, &tau[1], &q[q_offset], ldq, &work[1], info); + } + +/* Clean up A */ + + i__1 = *n - *l - *k; + zlaset_("Full", k, &i__1, &c_b1, &c_b1, &a[a_offset], lda); + i__1 = *n - *l; + for (j = *n - *l - *k + 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = j - *n + *l + *k + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L110: */ + } +/* L120: */ + } + + } + + if (*m > *k) { + +/* QR factorization of A( K+1:M,N-L+1:N ) */ + + i__1 = *m - *k; + zgeqr2_(&i__1, l, &a[*k + 1 + (*n - *l + 1) * a_dim1], lda, &tau[1], & + work[1], info); + + if (wantu) { + +/* Update U(:,K+1:M) := U(:,K+1:M)*U1 */ + + i__1 = *m - *k; +/* Computing MIN */ + i__3 = *m - *k; + i__2 = f2cmin(i__3,*l); + zunm2r_("Right", "No transpose", m, &i__1, &i__2, &a[*k + 1 + (*n + - *l + 1) * a_dim1], lda, &tau[1], &u[(*k + 1) * u_dim1 + + 1], ldu, &work[1], info); + } + +/* Clean up */ + + i__1 = *n; + for (j = *n - *l + 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j - *n + *k + *l + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L130: */ + } +/* L140: */ + } + + } + + z__1.r = (doublereal) lwkopt, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + +/* End of ZGGSVP3 */ + +} /* zggsvp3_ */ + diff --git a/lapack-netlib/SRC/zgsvj0.c b/lapack-netlib/SRC/zgsvj0.c new file mode 100644 index 000000000..a6542cb66 --- /dev/null +++ b/lapack-netlib/SRC/zgsvj0.c @@ -0,0 +1,1544 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGSVJ0 pre-processor for the routine zgesvj. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGSVJ0 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, */ +/* SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP */ +/* DOUBLE PRECISION EPS, SFMIN, TOL */ +/* CHARACTER*1 JOBV */ +/* COMPLEX*16 A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) */ +/* DOUBLE PRECISION SVA( N ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGSVJ0 is called from ZGESVJ as a pre-processor and that is its main */ +/* > purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but */ +/* > it does not check convergence (stopping criterion). Few tuning */ +/* > parameters (marked by [TP]) are available for the implementer. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether the output from this procedure is used */ +/* > to compute the matrix V: */ +/* > = 'V': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the N-by-N array V. */ +/* > (See the description of V.) */ +/* > = 'A': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the MV-by-N array V. */ +/* > (See the descriptions of MV and V.) */ +/* > = 'N': the Jacobi rotations are not accumulated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. */ +/* > M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, M-by-N matrix A, such that A*diag(D) represents */ +/* > the input matrix. */ +/* > On exit, */ +/* > A_onexit * diag(D_onexit) represents the input matrix A*diag(D) */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of D, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > The array D accumulates the scaling factors from the complex scaled */ +/* > Jacobi rotations. */ +/* > On entry, A*diag(D) represents the input matrix. */ +/* > On exit, A_onexit*diag(D_onexit) represents the input matrix */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of A, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SVA */ +/* > \verbatim */ +/* > SVA is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, SVA contains the Euclidean norms of the columns of */ +/* > the matrix A*diag(D). */ +/* > On exit, SVA contains the Euclidean norms of the columns of */ +/* > the matrix A_onexit*diag(D_onexit). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MV */ +/* > \verbatim */ +/* > MV is INTEGER */ +/* > If JOBV = 'A', then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then MV is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension (LDV,N) */ +/* > If JOBV = 'V' then N rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'A' then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V', LDV >= N. */ +/* > If JOBV = 'A', LDV >= MV. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EPS */ +/* > \verbatim */ +/* > EPS is DOUBLE PRECISION */ +/* > EPS = DLAMCH('Epsilon') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SFMIN */ +/* > \verbatim */ +/* > SFMIN is DOUBLE PRECISION */ +/* > SFMIN = DLAMCH('Safe Minimum') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOL */ +/* > \verbatim */ +/* > TOL is DOUBLE PRECISION */ +/* > TOL is the threshold for Jacobi rotations. For a pair */ +/* > A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ +/* > applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NSWEEP */ +/* > \verbatim */ +/* > NSWEEP is INTEGER */ +/* > NSWEEP is the number of sweeps of Jacobi rotations to be */ +/* > performed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > LWORK is the dimension of WORK. LWORK >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16OTHERcomputational */ +/* > */ +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > ZGSVJ0 is used just to enable ZGESVJ to call a simplified version of */ +/* > itself to work on a submatrix of the original matrix. */ +/* > */ +/* > Contributor: */ +/* ============= */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) */ +/* > */ +/* > \par Bugs, Examples and Comments: */ +/* ============================ */ +/* > */ +/* > Please report all bugs and send interesting test examples and comments to */ +/* > drmac@math.hr. Thank you. */ + +/* ===================================================================== */ +/* Subroutine */ int zgsvj0_(char *jobv, integer *m, integer *n, + doublecomplex *a, integer *lda, doublecomplex *d__, doublereal *sva, + integer *mv, doublecomplex *v, integer *ldv, doublereal *eps, + doublereal *sfmin, doublereal *tol, integer *nsweep, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7; + doublereal d__1, d__2; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + doublereal aapp; + doublecomplex aapq; + doublereal aaqq; + integer ierr; + doublereal bigtheta; + doublecomplex ompq; + integer pskipped; + extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *); + doublereal aapp0, aapq1, temp1; + integer i__, p, q; + doublereal t, apoaq, aqoap; + extern logical lsame_(char *, char *); + doublereal theta, small; + logical applv, rsvec; + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + logical rotok; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *), zaxpy_(integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *); + doublereal rootsfmin; + extern doublereal dznrm2_(integer *, doublecomplex *, integer *); + doublereal cs, sn; + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer ijblsk, swband, blskip; + doublereal mxaapq; + extern /* Subroutine */ int zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *); + doublereal thsign, mxsinj; + integer ir1; + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + integer emptsw, notrot, iswrot, jbc; + doublereal big; + integer kbl, lkahead, igl, ibr, jgl, nbl, mvl; + doublereal rootbig, rooteps; + integer rowskip; + doublereal roottol; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + +/* from BLAS */ +/* from LAPACK */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --sva; + --d__; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + + /* Function Body */ + applv = lsame_(jobv, "A"); + rsvec = lsame_(jobv, "V"); + if (! (rsvec || applv || lsame_(jobv, "N"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || *n > *m) { + *info = -3; + } else if (*lda < *m) { + *info = -5; + } else if ((rsvec || applv) && *mv < 0) { + *info = -8; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -10; + } else if (*tol <= *eps) { + *info = -13; + } else if (*nsweep < 0) { + *info = -14; + } else if (*lwork < *m) { + *info = -16; + } else { + *info = 0; + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGSVJ0", &i__1, (ftnlen)6); + return 0; + } + + if (rsvec) { + mvl = *n; + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + rooteps = sqrt(*eps); + rootsfmin = sqrt(*sfmin); + small = *sfmin / *eps; + big = 1. / *sfmin; + rootbig = 1. / rootsfmin; + bigtheta = 1. / rooteps; + roottol = sqrt(*tol); + + + emptsw = *n * (*n - 1) / 2; + notrot = 0; + + + swband = 0; +/* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */ +/* if ZGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm ZGEJSV. For sweeps i=1:SWBAND the procedure */ +/* works on pivots inside a band-like region around the diagonal. */ +/* The boundaries are determined dynamically, based on the number of */ +/* pivots above a threshold. */ + + kbl = f2cmin(8,*n); +/* [TP] KBL is a tuning parameter that defines the tile size in the */ +/* tiling of the p-q loops of pivot pairs. In general, an optimal */ +/* value of KBL depends on the matrix dimensions and on the */ +/* parameters of the computer's memory. */ + + nbl = *n / kbl; + if (nbl * kbl != *n) { + ++nbl; + } + +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + + rowskip = f2cmin(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + + lkahead = 1; +/* [TP] LKAHEAD is a tuning parameter. */ + +/* Quasi block transformations, using the lower (upper) triangular */ +/* structure of the input matrix. The quasi-block-cycling usually */ +/* invokes cubic convergence. Big part of this cycle is done inside */ +/* canonical subspaces of dimensions less than M. */ + + + + i__1 = *nsweep; + for (i__ = 1; i__ <= i__1; ++i__) { + + + mxaapq = 0.; + mxsinj = 0.; + iswrot = 0; + + notrot = 0; + pskipped = 0; + +/* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */ +/* 1 <= p < q <= N. This is the first step toward a blocked implementation */ +/* of the rotations. New implementation, based on block transformations, */ +/* is under development. */ + + i__2 = nbl; + for (ibr = 1; ibr <= i__2; ++ibr) { + + igl = (ibr - 1) * kbl + 1; + +/* Computing MIN */ + i__4 = lkahead, i__5 = nbl - ibr; + i__3 = f2cmin(i__4,i__5); + for (ir1 = 0; ir1 <= i__3; ++ir1) { + + igl += ir1 * kbl; + +/* Computing MIN */ + i__5 = igl + kbl - 1, i__6 = *n - 1; + i__4 = f2cmin(i__5,i__6); + for (p = igl; p <= i__4; ++p) { + + + i__5 = *n - p + 1; + q = idamax_(&i__5, &sva[p], &c__1) + p - 1; + if (p != q) { + zswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + + 1], &c__1); + if (rsvec) { + zswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1); + } + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + i__5 = p; + aapq.r = d__[i__5].r, aapq.i = d__[i__5].i; + i__5 = p; + i__6 = q; + d__[i__5].r = d__[i__6].r, d__[i__5].i = d__[i__6].i; + i__5 = q; + d__[i__5].r = aapq.r, d__[i__5].i = aapq.i; + } + + if (ir1 == 0) { + +/* Column norms are periodically updated by explicit */ +/* norm computation. */ +/* Caveat: */ +/* Unfortunately, some BLAS implementations compute SNCRM2(M,A(1,p),1) */ +/* as SQRT(S=ZDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to */ +/* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to */ +/* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). */ +/* Hence, DZNRM2 cannot be trusted, not even in the case when */ +/* the true norm is far from the under(over)flow boundaries. */ +/* If properly implemented DZNRM2 is available, the IF-THEN-ELSE-END IF */ +/* below should be replaced with "AAPP = DZNRM2( M, A(1,p), 1 )". */ + + if (sva[p] < rootbig && sva[p] > rootsfmin) { + sva[p] = dznrm2_(m, &a[p * a_dim1 + 1], &c__1); + } else { + temp1 = 0.; + aapp = 1.; + zlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & + aapp); + sva[p] = temp1 * sqrt(aapp); + } + aapp = sva[p]; + } else { + aapp = sva[p]; + } + + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = igl + kbl - 1; + i__5 = f2cmin(i__6,*n); + for (q = p + 1; q <= i__5; ++q) { + + aaqq = sva[q]; + + if (aaqq > 0.) { + + aapp0 = aapp; + if (aaqq >= 1.) { + rotok = small * aapp <= aaqq; + if (aapp < big / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + z__2.r = z__3.r / aaqq, z__2.i = + z__3.i / aaqq; + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aapp, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + zdotc_(&z__2, m, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + z__1.r = z__2.r / aaqq, z__1.i = + z__2.i / aaqq; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } else { + rotok = aapp <= aaqq / small; + if (aapp > small / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + z__2.r = z__3.r / aapp, z__2.i = + z__3.i / aapp; + z__1.r = z__2.r / aaqq, z__1.i = + z__2.i / aaqq; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aaqq, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + zdotc_(&z__2, m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } + +/* AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q) */ + aapq1 = -z_abs(&aapq); +/* Computing MAX */ + d__1 = mxaapq, d__2 = -aapq1; + mxaapq = f2cmax(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq1) > *tol) { + d__1 = z_abs(&aapq); + z__1.r = aapq.r / d__1, z__1.i = aapq.i / + d__1; + ompq.r = z__1.r, ompq.i = z__1.i; + +/* [RTD] ROTATED = ROTATED + ONE */ + + if (ir1 == 0) { + notrot = 0; + pskipped = 0; + ++iswrot; + } + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq1; + + if (abs(theta) > bigtheta) { + + t = .5 / theta; + cs = 1.; + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * + z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = f2cmax(d__1,d__2); + + } else { + + + thsign = -d_sign(&c_b27, &aapq1); + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; + +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = f2cmax(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); + + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn + * z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } + } + i__6 = p; + i__7 = q; + z__2.r = -d__[i__7].r, z__2.i = -d__[ + i__7].i; + z__1.r = z__2.r * ompq.r - z__2.i * + ompq.i, z__1.i = z__2.r * + ompq.i + z__2.i * ompq.r; + d__[i__6].r = z__1.r, d__[i__6].i = + z__1.i; + + } else { + zcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aapp, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + zlascl_("G", &c__0, &c__0, &aaqq, & + c_b27, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + z__1.r = -aapq.r, z__1.i = -aapq.i; + zaxpy_(m, &z__1, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + zlascl_("G", &c__0, &c__0, &c_b27, & + aaqq, m, &c__1, &a[q * a_dim1 + + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq1 * aapq1; + sva[q] = aaqq * sqrt((f2cmax(d__1,d__2))) + ; + mxsinj = f2cmax(mxsinj,*sfmin); + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* recompute SVA(q), SVA(p). */ + +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = dznrm2_(m, &a[q * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aaqq = 1.; + zlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq); + } + } + if (aapp / aapp0 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = dznrm2_(m, &a[p * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aapp = 1.; + zlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp); + } + sva[p] = aapp; + } + + } else { +/* A(:,p) and A(:,q) already numerically orthogonal */ + if (ir1 == 0) { + ++notrot; + } +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + } + } else { +/* A(:,q) is zero column */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + + if (i__ <= swband && pskipped > rowskip) { + if (ir1 == 0) { + aapp = -aapp; + } + notrot = 0; + goto L2103; + } + +/* L2002: */ + } +/* END q-LOOP */ + +L2103: +/* bailed out of q-loop */ + + sva[p] = aapp; + + } else { + sva[p] = aapp; + if (ir1 == 0 && aapp == 0.) { +/* Computing MIN */ + i__5 = igl + kbl - 1; + notrot = notrot + f2cmin(i__5,*n) - p; + } + } + +/* L2001: */ + } +/* end of the p-loop */ +/* end of doing the block ( ibr, ibr ) */ +/* L1002: */ + } +/* end of ir1-loop */ + +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + + i__3 = nbl; + for (jbc = ibr + 1; jbc <= i__3; ++jbc) { + + jgl = (jbc - 1) * kbl + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = f2cmin(i__5,*n); + for (p = igl; p <= i__4; ++p) { + + aapp = sva[p]; + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = jgl + kbl - 1; + i__5 = f2cmin(i__6,*n); + for (q = jgl; q <= i__5; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.) { + aapp0 = aapp; + + +/* Safe Gram matrix computation */ + + if (aaqq >= 1.) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + z__2.r = z__3.r / aaqq, z__2.i = + z__3.i / aaqq; + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aapp, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + zdotc_(&z__2, m, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + z__1.r = z__2.r / aaqq, z__1.i = + z__2.i / aaqq; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + d__1 = f2cmax(aaqq,aapp); + z__2.r = z__3.r / d__1, z__2.i = + z__3.i / d__1; + d__2 = f2cmin(aaqq,aapp); + z__1.r = z__2.r / d__2, z__1.i = + z__2.i / d__2; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aaqq, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + zdotc_(&z__2, m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } + +/* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) */ + aapq1 = -z_abs(&aapq); +/* Computing MAX */ + d__1 = mxaapq, d__2 = -aapq1; + mxaapq = f2cmax(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq1) > *tol) { + d__1 = z_abs(&aapq); + z__1.r = aapq.r / d__1, z__1.i = aapq.i / + d__1; + ompq.r = z__1.r, ompq.i = z__1.i; + notrot = 0; +/* [RTD] ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq1; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5 / theta; + cs = 1.; + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * + z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = f2cmax(d__1,d__2); + } else { + + + thsign = -d_sign(&c_b27, &aapq1); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = f2cmax(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); + + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn + * z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } + } + i__6 = p; + i__7 = q; + z__2.r = -d__[i__7].r, z__2.i = -d__[ + i__7].i; + z__1.r = z__2.r * ompq.r - z__2.i * + ompq.i, z__1.i = z__2.r * + ompq.i + z__2.i * ompq.r; + d__[i__6].r = z__1.r, d__[i__6].i = + z__1.i; + + } else { + if (aapp > aaqq) { + zcopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aapp, + &c_b27, m, &c__1, &work[1] + , lda, &ierr); + zlascl_("G", &c__0, &c__0, &aaqq, + &c_b27, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + z__1.r = -aapq.r, z__1.i = + -aapq.i; + zaxpy_(m, &z__1, &work[1], &c__1, + &a[q * a_dim1 + 1], &c__1) + ; + zlascl_("G", &c__0, &c__0, &c_b27, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq1 * + aapq1; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } else { + zcopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aaqq, + &c_b27, m, &c__1, &work[1] + , lda, &ierr); + zlascl_("G", &c__0, &c__0, &aapp, + &c_b27, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + d_cnjg(&z__2, &aapq); + z__1.r = -z__2.r, z__1.i = + -z__2.i; + zaxpy_(m, &z__1, &work[1], &c__1, + &a[p * a_dim1 + 1], &c__1) + ; + zlascl_("G", &c__0, &c__0, &c_b27, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq1 * + aapq1; + sva[p] = aapp * sqrt((f2cmax(d__1, + d__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = dznrm2_(m, &a[q * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aaqq = 1.; + zlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq); + } + } +/* Computing 2nd power */ + d__1 = aapp / aapp0; + if (d__1 * d__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = dznrm2_(m, &a[p * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aapp = 1.; + zlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp); + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + + if (aapp == 0.) { +/* Computing MIN */ + i__5 = jgl + kbl - 1; + notrot = notrot + f2cmin(i__5,*n) - jgl + 1; + } + if (aapp < 0.) { + notrot = 0; + } + + } + +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = f2cmin(i__4,*n); + for (p = igl; p <= i__3; ++p) { + sva[p] = (d__1 = sva[p], abs(d__1)); +/* L2012: */ + } +/* ** */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = dznrm2_(m, &a[*n * a_dim1 + 1], &c__1); + } else { + t = 0.; + aapp = 1.; + zlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp); + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < sqrt((doublereal) (*n)) * *tol && ( + doublereal) (*n) * mxaapq * mxsinj < *tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } + +/* L1993: */ + } +/* end i=1:NSWEEP loop */ + +/* #:( Reaching this point means that the procedure has not converged. */ + *info = *nsweep - 1; + goto L1995; + +L1994: +/* #:) Reaching this point means numerical convergence after the i-th */ +/* sweep. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the vector SVA() of column norms. */ + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = idamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + i__2 = p; + aapq.r = d__[i__2].r, aapq.i = d__[i__2].i; + i__2 = p; + i__3 = q; + d__[i__2].r = d__[i__3].r, d__[i__2].i = d__[i__3].i; + i__2 = q; + d__[i__2].r = aapq.r, d__[i__2].i = aapq.i; + zswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + zswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } +/* L5991: */ + } + + return 0; +} /* zgsvj0_ */ + diff --git a/lapack-netlib/SRC/zgsvj1.c b/lapack-netlib/SRC/zgsvj1.c new file mode 100644 index 000000000..f2b3de9ef --- /dev/null +++ b/lapack-netlib/SRC/zgsvj1.c @@ -0,0 +1,1228 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGSVJ1 pre-processor for the routine zgesvj, applies Jacobi rotations targeting only particular + pivots. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGSVJ1 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, */ +/* EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) */ + +/* DOUBLE PRECISION EPS, SFMIN, TOL */ +/* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP */ +/* CHARACTER*1 JOBV */ +/* COMPLEX*16 A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) */ +/* DOUBLE PRECISION SVA( N ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGSVJ1 is called from ZGESVJ as a pre-processor and that is its main */ +/* > purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but */ +/* > it targets only particular pivots and it does not check convergence */ +/* > (stopping criterion). Few tunning parameters (marked by [TP]) are */ +/* > available for the implementer. */ +/* > */ +/* > Further Details */ +/* > ~~~~~~~~~~~~~~~ */ +/* > ZGSVJ1 applies few sweeps of Jacobi rotations in the column space of */ +/* > the input M-by-N matrix A. The pivot pairs are taken from the (1,2) */ +/* > off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The */ +/* > block-entries (tiles) of the (1,2) off-diagonal block are marked by the */ +/* > [x]'s in the following scheme: */ +/* > */ +/* > | * * * [x] [x] [x]| */ +/* > | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ +/* > | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ +/* > |[x] [x] [x] * * * | */ +/* > |[x] [x] [x] * * * | */ +/* > |[x] [x] [x] * * * | */ +/* > */ +/* > In terms of the columns of A, the first N1 columns are rotated 'against' */ +/* > the remaining N-N1 columns, trying to increase the angle between the */ +/* > corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is */ +/* > tiled using quadratic tiles of side KBL. Here, KBL is a tunning parameter. */ +/* > The number of sweeps is given in NSWEEP and the orthogonality threshold */ +/* > is given in TOL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether the output from this procedure is used */ +/* > to compute the matrix V: */ +/* > = 'V': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the N-by-N array V. */ +/* > (See the description of V.) */ +/* > = 'A': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the MV-by-N array V. */ +/* > (See the descriptions of MV and V.) */ +/* > = 'N': the Jacobi rotations are not accumulated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. */ +/* > M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N1 */ +/* > \verbatim */ +/* > N1 is INTEGER */ +/* > N1 specifies the 2 x 2 block partition, the first N1 columns are */ +/* > rotated 'against' the remaining N-N1 columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, M-by-N matrix A, such that A*diag(D) represents */ +/* > the input matrix. */ +/* > On exit, */ +/* > A_onexit * D_onexit represents the input matrix A*diag(D) */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of N1, D, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > The array D accumulates the scaling factors from the fast scaled */ +/* > Jacobi rotations. */ +/* > On entry, A*diag(D) represents the input matrix. */ +/* > On exit, A_onexit*diag(D_onexit) represents the input matrix */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of N1, A, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SVA */ +/* > \verbatim */ +/* > SVA is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, SVA contains the Euclidean norms of the columns of */ +/* > the matrix A*diag(D). */ +/* > On exit, SVA contains the Euclidean norms of the columns of */ +/* > the matrix onexit*diag(D_onexit). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MV */ +/* > \verbatim */ +/* > MV is INTEGER */ +/* > If JOBV = 'A', then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then MV is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension (LDV,N) */ +/* > If JOBV = 'V' then N rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'A' then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V', LDV >= N. */ +/* > If JOBV = 'A', LDV >= MV. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EPS */ +/* > \verbatim */ +/* > EPS is DOUBLE PRECISION */ +/* > EPS = DLAMCH('Epsilon') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SFMIN */ +/* > \verbatim */ +/* > SFMIN is DOUBLE PRECISION */ +/* > SFMIN = DLAMCH('Safe Minimum') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOL */ +/* > \verbatim */ +/* > TOL is DOUBLE PRECISION */ +/* > TOL is the threshold for Jacobi rotations. For a pair */ +/* > A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ +/* > applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NSWEEP */ +/* > \verbatim */ +/* > NSWEEP is INTEGER */ +/* > NSWEEP is the number of sweeps of Jacobi rotations to be */ +/* > performed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > LWORK is the dimension of WORK. LWORK >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Contributor: */ +/* ================== */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) */ + +/* ===================================================================== */ +/* Subroutine */ int zgsvj1_(char *jobv, integer *m, integer *n, integer *n1, + doublecomplex *a, integer *lda, doublecomplex *d__, doublereal *sva, + integer *mv, doublecomplex *v, integer *ldv, doublereal *eps, + doublereal *sfmin, doublereal *tol, integer *nsweep, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7; + doublereal d__1, d__2; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + integer nblc; + doublereal aapp; + doublecomplex aapq; + doublereal aaqq; + integer nblr, ierr; + doublereal bigtheta; + doublecomplex ompq; + integer pskipped; + extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *); + doublereal aapp0, aapq1, temp1; + integer i__, p, q; + doublereal t, apoaq, aqoap; + extern logical lsame_(char *, char *); + doublereal theta, small; + logical applv, rsvec; + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + logical rotok; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *), zaxpy_(integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *); + doublereal rootsfmin; + extern doublereal dznrm2_(integer *, doublecomplex *, integer *); + doublereal cs, sn; + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer ijblsk, swband, blskip; + doublereal mxaapq; + extern /* Subroutine */ int zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *); + doublereal thsign, mxsinj; + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + integer emptsw, notrot, iswrot, jbc; + doublereal big; + integer kbl, igl, ibr, jgl, mvl; + doublereal rootbig, rooteps; + integer rowskip; + doublereal roottol; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --sva; + --d__; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + + /* Function Body */ + applv = lsame_(jobv, "A"); + rsvec = lsame_(jobv, "V"); + if (! (rsvec || applv || lsame_(jobv, "N"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || *n > *m) { + *info = -3; + } else if (*n1 < 0) { + *info = -4; + } else if (*lda < *m) { + *info = -6; + } else if ((rsvec || applv) && *mv < 0) { + *info = -9; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -11; + } else if (*tol <= *eps) { + *info = -14; + } else if (*nsweep < 0) { + *info = -15; + } else if (*lwork < *m) { + *info = -17; + } else { + *info = 0; + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGSVJ1", &i__1, (ftnlen)6); + return 0; + } + + if (rsvec) { + mvl = *n; + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + rooteps = sqrt(*eps); + rootsfmin = sqrt(*sfmin); + small = *sfmin / *eps; + big = 1. / *sfmin; + rootbig = 1. / rootsfmin; +/* LARGE = BIG / SQRT( DBLE( M*N ) ) */ + bigtheta = 1. / rooteps; + roottol = sqrt(*tol); + + +/* RSVEC = LSAME( JOBV, 'Y' ) */ + + emptsw = *n1 * (*n - *n1); + notrot = 0; + + + kbl = f2cmin(8,*n); + nblr = *n1 / kbl; + if (nblr * kbl != *n1) { + ++nblr; + } + nblc = (*n - *n1) / kbl; + if (nblc * kbl != *n - *n1) { + ++nblc; + } +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1 + 1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + rowskip = f2cmin(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + swband = 0; +/* [TP] SWBAND is a tuning parameter. It is meaningful and effective */ +/* if ZGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm ZGEJSV. */ + + +/* | * * * [x] [x] [x]| */ +/* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ +/* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ + + + i__1 = *nsweep; + for (i__ = 1; i__ <= i__1; ++i__) { + + + mxaapq = 0.; + mxsinj = 0.; + iswrot = 0; + + notrot = 0; + pskipped = 0; + +/* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */ +/* 1 <= p < q <= N. This is the first step toward a blocked implementation */ +/* of the rotations. New implementation, based on block transformations, */ +/* is under development. */ + + i__2 = nblr; + for (ibr = 1; ibr <= i__2; ++ibr) { + + igl = (ibr - 1) * kbl + 1; + + +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + +/* DO 2010 jbc = ibr + 1, NBL */ + i__3 = nblc; + for (jbc = 1; jbc <= i__3; ++jbc) { + + jgl = (jbc - 1) * kbl + *n1 + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = f2cmin(i__5,*n1); + for (p = igl; p <= i__4; ++p) { + + aapp = sva[p]; + if (aapp > 0.) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = jgl + kbl - 1; + i__5 = f2cmin(i__6,*n); + for (q = jgl; q <= i__5; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.) { + aapp0 = aapp; + + +/* Safe Gram matrix computation */ + + if (aaqq >= 1.) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + z__2.r = z__3.r / aaqq, z__2.i = + z__3.i / aaqq; + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aapp, & + c_b18, m, &c__1, &work[1], + lda, &ierr); + zdotc_(&z__2, m, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + z__1.r = z__2.r / aaqq, z__1.i = + z__2.i / aaqq; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + zdotc_(&z__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + d__1 = f2cmax(aaqq,aapp); + z__2.r = z__3.r / d__1, z__2.i = + z__3.i / d__1; + d__2 = f2cmin(aaqq,aapp); + z__1.r = z__2.r / d__2, z__1.i = + z__2.i / d__2; + aapq.r = z__1.r, aapq.i = z__1.i; + } else { + zcopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aaqq, & + c_b18, m, &c__1, &work[1], + lda, &ierr); + zdotc_(&z__2, m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + z__1.r = z__2.r / aapp, z__1.i = + z__2.i / aapp; + aapq.r = z__1.r, aapq.i = z__1.i; + } + } + +/* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) */ + aapq1 = -z_abs(&aapq); +/* Computing MAX */ + d__1 = mxaapq, d__2 = -aapq1; + mxaapq = f2cmax(d__1,d__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq1) > *tol) { + d__1 = z_abs(&aapq); + z__1.r = aapq.r / d__1, z__1.i = aapq.i / + d__1; + ompq.r = z__1.r, ompq.i = z__1.i; + notrot = 0; +/* [RTD] ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (d__1 = aqoap - apoaq, abs( + d__1)) * -.5 / aapq1; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5 / theta; + cs = 1.; + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * + z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = t * z__2.r, z__1.i = t * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(t); + mxsinj = f2cmax(d__1,d__2); + } else { + + + thsign = -d_sign(&c_b18, &aapq1); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1. / (theta + thsign * sqrt( + theta * theta + 1.)); + cs = sqrt(1. / (t * t + 1.)); + sn = t * cs; +/* Computing MAX */ + d__1 = mxsinj, d__2 = abs(sn); + mxsinj = f2cmax(d__1,d__2); +/* Computing MAX */ + d__1 = 0., d__2 = t * apoaq * + aapq1 + 1.; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - t * aqoap * + aapq1; + aapp *= sqrt((f2cmax(d__1,d__2))); + + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn + * z__2.i; + zrot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &z__1); + if (rsvec) { + d_cnjg(&z__2, &ompq); + z__1.r = sn * z__2.r, z__1.i = sn * z__2.i; + zrot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &z__1); + } + } + i__6 = p; + i__7 = q; + z__2.r = -d__[i__7].r, z__2.i = -d__[ + i__7].i; + z__1.r = z__2.r * ompq.r - z__2.i * + ompq.i, z__1.i = z__2.r * + ompq.i + z__2.i * ompq.r; + d__[i__6].r = z__1.r, d__[i__6].i = + z__1.i; + + } else { + if (aapp > aaqq) { + zcopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &work[1] + , lda, &ierr); + zlascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + z__1.r = -aapq.r, z__1.i = + -aapq.i; + zaxpy_(m, &z__1, &work[1], &c__1, + &a[q * a_dim1 + 1], &c__1) + ; + zlascl_("G", &c__0, &c__0, &c_b18, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq1 * + aapq1; + sva[q] = aaqq * sqrt((f2cmax(d__1, + d__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } else { + zcopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[1], &c__1); + zlascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &work[1] + , lda, &ierr); + zlascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + d_cnjg(&z__2, &aapq); + z__1.r = -z__2.r, z__1.i = + -z__2.i; + zaxpy_(m, &z__1, &work[1], &c__1, + &a[p * a_dim1 + 1], &c__1) + ; + zlascl_("G", &c__0, &c__0, &c_b18, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + d__1 = 0., d__2 = 1. - aapq1 * + aapq1; + sva[p] = aapp * sqrt((f2cmax(d__1, + d__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* Computing 2nd power */ + d__1 = sva[q] / aaqq; + if (d__1 * d__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = dznrm2_(m, &a[q * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aaqq = 1.; + zlassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq); + } + } +/* Computing 2nd power */ + d__1 = aapp / aapp0; + if (d__1 * d__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = dznrm2_(m, &a[p * a_dim1 + + 1], &c__1); + } else { + t = 0.; + aapp = 1.; + zlassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp); + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + + if (aapp == 0.) { +/* Computing MIN */ + i__5 = jgl + kbl - 1; + notrot = notrot + f2cmin(i__5,*n) - jgl + 1; + } + if (aapp < 0.) { + notrot = 0; + } + + } + +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = f2cmin(i__4,*n); + for (p = igl; p <= i__3; ++p) { + sva[p] = (d__1 = sva[p], abs(d__1)); +/* L2012: */ + } +/* ** */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = dznrm2_(m, &a[*n * a_dim1 + 1], &c__1); + } else { + t = 0.; + aapp = 1.; + zlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp); + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < sqrt((doublereal) (*n)) * *tol && ( + doublereal) (*n) * mxaapq * mxsinj < *tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } + +/* L1993: */ + } +/* end i=1:NSWEEP loop */ + +/* #:( Reaching this point means that the procedure has not converged. */ + *info = *nsweep - 1; + goto L1995; + +L1994: +/* #:) Reaching this point means numerical convergence after the i-th */ +/* sweep. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the vector SVA() of column norms. */ + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = idamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + i__2 = p; + aapq.r = d__[i__2].r, aapq.i = d__[i__2].i; + i__2 = p; + i__3 = q; + d__[i__2].r = d__[i__3].r, d__[i__2].i = d__[i__3].i; + i__2 = q; + d__[i__2].r = aapq.r, d__[i__2].i = aapq.i; + zswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + zswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } +/* L5991: */ + } + + + return 0; +} /* zgsvj1_ */ + diff --git a/lapack-netlib/SRC/zgtcon.c b/lapack-netlib/SRC/zgtcon.c new file mode 100644 index 000000000..53b24ac00 --- /dev/null +++ b/lapack-netlib/SRC/zgtcon.c @@ -0,0 +1,647 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGTCON */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGTCON + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, */ +/* WORK, INFO ) */ + +/* CHARACTER NORM */ +/* INTEGER INFO, N */ +/* DOUBLE PRECISION ANORM, RCOND */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGTCON estimates the reciprocal of the condition number of a complex */ +/* > tridiagonal matrix A using the LU factorization as computed by */ +/* > ZGTTRF. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies whether the 1-norm condition number or the */ +/* > infinity-norm condition number is required: */ +/* > = '1' or 'O': 1-norm; */ +/* > = 'I': Infinity-norm. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A as computed by ZGTTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) elements of the first superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX*16 array, dimension (N-2) */ +/* > The (n-2) elements of the second superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is DOUBLE PRECISION */ +/* > If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* > If NORM = 'I', the infinity-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* > estimate of the 1-norm of inv(A) computed in this routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgtcon_(char *norm, integer *n, doublecomplex *dl, + doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer * + ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, + integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer kase, kase1, i__; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int zlacn2_(integer *, doublecomplex *, + doublecomplex *, doublereal *, integer *, integer *), xerbla_( + char *, integer *, ftnlen); + doublereal ainvnm; + logical onenrm; + extern /* Subroutine */ int zgttrs_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * + , integer *, doublecomplex *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + --work; + --ipiv; + --du2; + --du; + --d__; + --dl; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O"); + if (! onenrm && ! lsame_(norm, "I")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*anorm < 0.) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGTCON", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + +/* Check that D(1:N) is non-zero. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + if (d__[i__2].r == 0. && d__[i__2].i == 0.) { + return 0; + } +/* L10: */ + } + + ainvnm = 0.; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L20: + zlacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(U)*inv(L). */ + + zgttrs_("No transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1] + , &ipiv[1], &work[1], n, info); + } else { + +/* Multiply by inv(L**H)*inv(U**H). */ + + zgttrs_("Conjugate transpose", n, &c__1, &dl[1], &d__[1], &du[1], + &du2[1], &ipiv[1], &work[1], n, info); + } + goto L20; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + + return 0; + +/* End of ZGTCON */ + +} /* zgtcon_ */ + diff --git a/lapack-netlib/SRC/zgtrfs.c b/lapack-netlib/SRC/zgtrfs.c new file mode 100644 index 000000000..d6301dd33 --- /dev/null +++ b/lapack-netlib/SRC/zgtrfs.c @@ -0,0 +1,1016 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGTRFS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGTRFS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, */ +/* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDB, LDX, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) */ +/* COMPLEX*16 B( LDB, * ), D( * ), DF( * ), DL( * ), */ +/* $ DLF( * ), DU( * ), DU2( * ), DUF( * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGTRFS improves the computed solution to a system of linear */ +/* > equations when the coefficient matrix is tridiagonal, and provides */ +/* > error bounds and backward error estimates for the solution. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) subdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > The diagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) superdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DLF */ +/* > \verbatim */ +/* > DLF is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A as computed by ZGTTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DF */ +/* > \verbatim */ +/* > DF is COMPLEX*16 array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DUF */ +/* > \verbatim */ +/* > DUF is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) elements of the first superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX*16 array, dimension (N-2) */ +/* > The (n-2) elements of the second superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by ZGTTRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > ITMAX is the maximum number of steps of iterative refinement. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgtrfs_(char *trans, integer *n, integer *nrhs, + doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *ferr, doublereal *berr, + doublecomplex *work, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7, i__8, i__9; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8, d__9, d__10, + d__11, d__12, d__13, d__14; + doublecomplex z__1; + + /* Local variables */ + integer kase; + doublereal safe1, safe2; + integer i__, j; + doublereal s; + extern logical lsame_(char *, char *); + integer isave[3], count; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), zlacn2_( + integer *, doublecomplex *, doublecomplex *, doublereal *, + integer *, integer *); + extern doublereal dlamch_(char *); + integer nz; + doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zlagtm_( + char *, integer *, integer *, doublereal *, doublecomplex *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + doublereal *, doublecomplex *, integer *); + logical notran; + char transn[1], transt[1]; + doublereal lstres; + extern /* Subroutine */ int zgttrs_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * + , integer *, doublecomplex *, integer *, integer *); + doublereal eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --dlf; + --df; + --duf; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < f2cmax(1,*n)) { + *info = -13; + } else if (*ldx < f2cmax(1,*n)) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGTRFS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transn = 'N'; + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transn = 'C'; + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = 4; + eps = dlamch_("Epsilon"); + safmin = dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + zcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1); + zlagtm_(trans, n, &c__1, &c_b18, &dl[1], &d__[1], &du[1], &x[j * + x_dim1 + 1], ldx, &c_b19, &work[1], n); + +/* Compute abs(op(A))*abs(x) + abs(b) for use in the backward */ +/* error bound. */ + + if (notran) { + if (*n == 1) { + i__2 = j * b_dim1 + 1; + i__3 = j * x_dim1 + 1; + rwork[1] = (d__1 = b[i__2].r, abs(d__1)) + (d__2 = d_imag(&b[ + j * b_dim1 + 1]), abs(d__2)) + ((d__3 = d__[1].r, abs( + d__3)) + (d__4 = d_imag(&d__[1]), abs(d__4))) * (( + d__5 = x[i__3].r, abs(d__5)) + (d__6 = d_imag(&x[j * + x_dim1 + 1]), abs(d__6))); + } else { + i__2 = j * b_dim1 + 1; + i__3 = j * x_dim1 + 1; + i__4 = j * x_dim1 + 2; + rwork[1] = (d__1 = b[i__2].r, abs(d__1)) + (d__2 = d_imag(&b[ + j * b_dim1 + 1]), abs(d__2)) + ((d__3 = d__[1].r, abs( + d__3)) + (d__4 = d_imag(&d__[1]), abs(d__4))) * (( + d__5 = x[i__3].r, abs(d__5)) + (d__6 = d_imag(&x[j * + x_dim1 + 1]), abs(d__6))) + ((d__7 = du[1].r, abs( + d__7)) + (d__8 = d_imag(&du[1]), abs(d__8))) * ((d__9 + = x[i__4].r, abs(d__9)) + (d__10 = d_imag(&x[j * + x_dim1 + 2]), abs(d__10))); + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ - 1; + i__5 = i__ - 1 + j * x_dim1; + i__6 = i__; + i__7 = i__ + j * x_dim1; + i__8 = i__; + i__9 = i__ + 1 + j * x_dim1; + rwork[i__] = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = + d_imag(&b[i__ + j * b_dim1]), abs(d__2)) + ((d__3 + = dl[i__4].r, abs(d__3)) + (d__4 = d_imag(&dl[i__ + - 1]), abs(d__4))) * ((d__5 = x[i__5].r, abs(d__5) + ) + (d__6 = d_imag(&x[i__ - 1 + j * x_dim1]), abs( + d__6))) + ((d__7 = d__[i__6].r, abs(d__7)) + ( + d__8 = d_imag(&d__[i__]), abs(d__8))) * ((d__9 = + x[i__7].r, abs(d__9)) + (d__10 = d_imag(&x[i__ + + j * x_dim1]), abs(d__10))) + ((d__11 = du[i__8].r, + abs(d__11)) + (d__12 = d_imag(&du[i__]), abs( + d__12))) * ((d__13 = x[i__9].r, abs(d__13)) + ( + d__14 = d_imag(&x[i__ + 1 + j * x_dim1]), abs( + d__14))); +/* L30: */ + } + i__2 = *n + j * b_dim1; + i__3 = *n - 1; + i__4 = *n - 1 + j * x_dim1; + i__5 = *n; + i__6 = *n + j * x_dim1; + rwork[*n] = (d__1 = b[i__2].r, abs(d__1)) + (d__2 = d_imag(&b[ + *n + j * b_dim1]), abs(d__2)) + ((d__3 = dl[i__3].r, + abs(d__3)) + (d__4 = d_imag(&dl[*n - 1]), abs(d__4))) + * ((d__5 = x[i__4].r, abs(d__5)) + (d__6 = d_imag(&x[* + n - 1 + j * x_dim1]), abs(d__6))) + ((d__7 = d__[i__5] + .r, abs(d__7)) + (d__8 = d_imag(&d__[*n]), abs(d__8))) + * ((d__9 = x[i__6].r, abs(d__9)) + (d__10 = d_imag(& + x[*n + j * x_dim1]), abs(d__10))); + } + } else { + if (*n == 1) { + i__2 = j * b_dim1 + 1; + i__3 = j * x_dim1 + 1; + rwork[1] = (d__1 = b[i__2].r, abs(d__1)) + (d__2 = d_imag(&b[ + j * b_dim1 + 1]), abs(d__2)) + ((d__3 = d__[1].r, abs( + d__3)) + (d__4 = d_imag(&d__[1]), abs(d__4))) * (( + d__5 = x[i__3].r, abs(d__5)) + (d__6 = d_imag(&x[j * + x_dim1 + 1]), abs(d__6))); + } else { + i__2 = j * b_dim1 + 1; + i__3 = j * x_dim1 + 1; + i__4 = j * x_dim1 + 2; + rwork[1] = (d__1 = b[i__2].r, abs(d__1)) + (d__2 = d_imag(&b[ + j * b_dim1 + 1]), abs(d__2)) + ((d__3 = d__[1].r, abs( + d__3)) + (d__4 = d_imag(&d__[1]), abs(d__4))) * (( + d__5 = x[i__3].r, abs(d__5)) + (d__6 = d_imag(&x[j * + x_dim1 + 1]), abs(d__6))) + ((d__7 = dl[1].r, abs( + d__7)) + (d__8 = d_imag(&dl[1]), abs(d__8))) * ((d__9 + = x[i__4].r, abs(d__9)) + (d__10 = d_imag(&x[j * + x_dim1 + 2]), abs(d__10))); + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ - 1; + i__5 = i__ - 1 + j * x_dim1; + i__6 = i__; + i__7 = i__ + j * x_dim1; + i__8 = i__; + i__9 = i__ + 1 + j * x_dim1; + rwork[i__] = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = + d_imag(&b[i__ + j * b_dim1]), abs(d__2)) + ((d__3 + = du[i__4].r, abs(d__3)) + (d__4 = d_imag(&du[i__ + - 1]), abs(d__4))) * ((d__5 = x[i__5].r, abs(d__5) + ) + (d__6 = d_imag(&x[i__ - 1 + j * x_dim1]), abs( + d__6))) + ((d__7 = d__[i__6].r, abs(d__7)) + ( + d__8 = d_imag(&d__[i__]), abs(d__8))) * ((d__9 = + x[i__7].r, abs(d__9)) + (d__10 = d_imag(&x[i__ + + j * x_dim1]), abs(d__10))) + ((d__11 = dl[i__8].r, + abs(d__11)) + (d__12 = d_imag(&dl[i__]), abs( + d__12))) * ((d__13 = x[i__9].r, abs(d__13)) + ( + d__14 = d_imag(&x[i__ + 1 + j * x_dim1]), abs( + d__14))); +/* L40: */ + } + i__2 = *n + j * b_dim1; + i__3 = *n - 1; + i__4 = *n - 1 + j * x_dim1; + i__5 = *n; + i__6 = *n + j * x_dim1; + rwork[*n] = (d__1 = b[i__2].r, abs(d__1)) + (d__2 = d_imag(&b[ + *n + j * b_dim1]), abs(d__2)) + ((d__3 = du[i__3].r, + abs(d__3)) + (d__4 = d_imag(&du[*n - 1]), abs(d__4))) + * ((d__5 = x[i__4].r, abs(d__5)) + (d__6 = d_imag(&x[* + n - 1 + j * x_dim1]), abs(d__6))) + ((d__7 = d__[i__5] + .r, abs(d__7)) + (d__8 = d_imag(&d__[*n]), abs(d__8))) + * ((d__9 = x[i__6].r, abs(d__9)) + (d__10 = d_imag(& + x[*n + j * x_dim1]), abs(d__10))); + } + } + +/* Compute componentwise relative backward error from formula */ + +/* f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { +/* Computing MAX */ + i__3 = i__; + d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2))) / rwork[i__]; + s = f2cmax(d__3,d__4); + } else { +/* Computing MAX */ + i__3 = i__; + d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2)) + safe1) / (rwork[i__] + + safe1); + s = f2cmax(d__3,d__4); + } +/* L50: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + zgttrs_(trans, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[ + 1], &work[1], n, info); + zaxpy_(n, &c_b26, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1); + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use ZLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { + i__3 = i__; + rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__] + ; + } else { + i__3 = i__; + rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__] + + safe1; + } +/* L60: */ + } + + kase = 0; +L70: + zlacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**H). */ + + zgttrs_(transt, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & + ipiv[1], &work[1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; +/* L80: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; +/* L90: */ + } + zgttrs_(transn, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & + ipiv[1], &work[1], n, info); + } + goto L70; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + i__3 = i__ + j * x_dim1; + d__3 = lstres, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = + d_imag(&x[i__ + j * x_dim1]), abs(d__2)); + lstres = f2cmax(d__3,d__4); +/* L100: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L110: */ + } + + return 0; + +/* End of ZGTRFS */ + +} /* zgtrfs_ */ + diff --git a/lapack-netlib/SRC/zgtsv.c b/lapack-netlib/SRC/zgtsv.c new file mode 100644 index 000000000..aaaa9bd44 --- /dev/null +++ b/lapack-netlib/SRC/zgtsv.c @@ -0,0 +1,711 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGTSV computes the solution to system of linear equations A * X = B for GT matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGTSV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) */ + +/* INTEGER INFO, LDB, N, NRHS */ +/* COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGTSV solves the equation */ +/* > */ +/* > A*X = B, */ +/* > */ +/* > where A is an N-by-N tridiagonal matrix, by Gaussian elimination with */ +/* > partial pivoting. */ +/* > */ +/* > Note that the equation A**T *X = B may be solved by interchanging the */ +/* > order of the arguments DU and DL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DL */ +/* > \verbatim */ +/* > DL is COMPLEX*16 array, dimension (N-1) */ +/* > On entry, DL must contain the (n-1) subdiagonal elements of */ +/* > A. */ +/* > On exit, DL is overwritten by the (n-2) elements of the */ +/* > second superdiagonal of the upper triangular matrix U from */ +/* > the LU factorization of A, in DL(1), ..., DL(n-2). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > On entry, D must contain the diagonal elements of A. */ +/* > On exit, D is overwritten by the n diagonal elements of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DU */ +/* > \verbatim */ +/* > DU is COMPLEX*16 array, dimension (N-1) */ +/* > On entry, DU must contain the (n-1) superdiagonal elements */ +/* > of A. */ +/* > On exit, DU is overwritten by the (n-1) elements of the first */ +/* > superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero, and the solution */ +/* > has not been computed. The factorization has not been */ +/* > completed unless i = N. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GTsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zgtsv_(integer *n, integer *nrhs, doublecomplex *dl, + doublecomplex *d__, doublecomplex *du, doublecomplex *b, integer *ldb, + integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4, z__5; + + /* Local variables */ + doublecomplex temp, mult; + integer j, k; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --dl; + --d__; + --du; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGTSV ", &i__1, (ftnlen)6); + return 0; + } + + if (*n == 0) { + return 0; + } + + i__1 = *n - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k; + if (dl[i__2].r == 0. && dl[i__2].i == 0.) { + +/* Subdiagonal is zero, no elimination is required. */ + + i__2 = k; + if (d__[i__2].r == 0. && d__[i__2].i == 0.) { + +/* Diagonal is zero: set INFO = K and return; a unique */ +/* solution can not be found. */ + + *info = k; + return 0; + } + } else /* if(complicated condition) */ { + i__2 = k; + i__3 = k; + if ((d__1 = d__[i__2].r, abs(d__1)) + (d__2 = d_imag(&d__[k]), + abs(d__2)) >= (d__3 = dl[i__3].r, abs(d__3)) + (d__4 = + d_imag(&dl[k]), abs(d__4))) { + +/* No row interchange required */ + + z_div(&z__1, &dl[k], &d__[k]); + mult.r = z__1.r, mult.i = z__1.i; + i__2 = k + 1; + i__3 = k + 1; + i__4 = k; + z__2.r = mult.r * du[i__4].r - mult.i * du[i__4].i, z__2.i = + mult.r * du[i__4].i + mult.i * du[i__4].r; + z__1.r = d__[i__3].r - z__2.r, z__1.i = d__[i__3].i - z__2.i; + d__[i__2].r = z__1.r, d__[i__2].i = z__1.i; + i__2 = *nrhs; + for (j = 1; j <= i__2; ++j) { + i__3 = k + 1 + j * b_dim1; + i__4 = k + 1 + j * b_dim1; + i__5 = k + j * b_dim1; + z__2.r = mult.r * b[i__5].r - mult.i * b[i__5].i, z__2.i = + mult.r * b[i__5].i + mult.i * b[i__5].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L10: */ + } + if (k < *n - 1) { + i__2 = k; + dl[i__2].r = 0., dl[i__2].i = 0.; + } + } else { + +/* Interchange rows K and K+1 */ + + z_div(&z__1, &d__[k], &dl[k]); + mult.r = z__1.r, mult.i = z__1.i; + i__2 = k; + i__3 = k; + d__[i__2].r = dl[i__3].r, d__[i__2].i = dl[i__3].i; + i__2 = k + 1; + temp.r = d__[i__2].r, temp.i = d__[i__2].i; + i__2 = k + 1; + i__3 = k; + z__2.r = mult.r * temp.r - mult.i * temp.i, z__2.i = mult.r * + temp.i + mult.i * temp.r; + z__1.r = du[i__3].r - z__2.r, z__1.i = du[i__3].i - z__2.i; + d__[i__2].r = z__1.r, d__[i__2].i = z__1.i; + if (k < *n - 1) { + i__2 = k; + i__3 = k + 1; + dl[i__2].r = du[i__3].r, dl[i__2].i = du[i__3].i; + i__2 = k + 1; + z__2.r = -mult.r, z__2.i = -mult.i; + i__3 = k; + z__1.r = z__2.r * dl[i__3].r - z__2.i * dl[i__3].i, + z__1.i = z__2.r * dl[i__3].i + z__2.i * dl[i__3] + .r; + du[i__2].r = z__1.r, du[i__2].i = z__1.i; + } + i__2 = k; + du[i__2].r = temp.r, du[i__2].i = temp.i; + i__2 = *nrhs; + for (j = 1; j <= i__2; ++j) { + i__3 = k + j * b_dim1; + temp.r = b[i__3].r, temp.i = b[i__3].i; + i__3 = k + j * b_dim1; + i__4 = k + 1 + j * b_dim1; + b[i__3].r = b[i__4].r, b[i__3].i = b[i__4].i; + i__3 = k + 1 + j * b_dim1; + i__4 = k + 1 + j * b_dim1; + z__2.r = mult.r * b[i__4].r - mult.i * b[i__4].i, z__2.i = + mult.r * b[i__4].i + mult.i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L20: */ + } + } + } +/* L30: */ + } + i__1 = *n; + if (d__[i__1].r == 0. && d__[i__1].i == 0.) { + *info = *n; + return 0; + } + +/* Back solve with the matrix U from the factorization. */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n + j * b_dim1; + z_div(&z__1, &b[*n + j * b_dim1], &d__[*n]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + if (*n > 1) { + i__2 = *n - 1 + j * b_dim1; + i__3 = *n - 1 + j * b_dim1; + i__4 = *n - 1; + i__5 = *n + j * b_dim1; + z__3.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, z__3.i = + du[i__4].r * b[i__5].i + du[i__4].i * b[i__5].r; + z__2.r = b[i__3].r - z__3.r, z__2.i = b[i__3].i - z__3.i; + z_div(&z__1, &z__2, &d__[*n - 1]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + for (k = *n - 2; k >= 1; --k) { + i__2 = k + j * b_dim1; + i__3 = k + j * b_dim1; + i__4 = k; + i__5 = k + 1 + j * b_dim1; + z__4.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, z__4.i = + du[i__4].r * b[i__5].i + du[i__4].i * b[i__5].r; + z__3.r = b[i__3].r - z__4.r, z__3.i = b[i__3].i - z__4.i; + i__6 = k; + i__7 = k + 2 + j * b_dim1; + z__5.r = dl[i__6].r * b[i__7].r - dl[i__6].i * b[i__7].i, z__5.i = + dl[i__6].r * b[i__7].i + dl[i__6].i * b[i__7].r; + z__2.r = z__3.r - z__5.r, z__2.i = z__3.i - z__5.i; + z_div(&z__1, &z__2, &d__[k]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of ZGTSV */ + +} /* zgtsv_ */ + diff --git a/lapack-netlib/SRC/zgtsvx.c b/lapack-netlib/SRC/zgtsvx.c new file mode 100644 index 000000000..77be4b883 --- /dev/null +++ b/lapack-netlib/SRC/zgtsvx.c @@ -0,0 +1,835 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZGTSVX computes the solution to system of linear equations A * X = B for GT matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGTSVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, */ +/* DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, */ +/* WORK, RWORK, INFO ) */ + +/* CHARACTER FACT, TRANS */ +/* INTEGER INFO, LDB, LDX, N, NRHS */ +/* DOUBLE PRECISION RCOND */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) */ +/* COMPLEX*16 B( LDB, * ), D( * ), DF( * ), DL( * ), */ +/* $ DLF( * ), DU( * ), DU2( * ), DUF( * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGTSVX uses the LU factorization to compute the solution to a complex */ +/* > system of linear equations A * X = B, A**T * X = B, or A**H * X = B, */ +/* > where A is a tridiagonal matrix of order N and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > Error bounds on the solution and a condition estimate are also */ +/* > provided. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'N', the LU decomposition is used to factor the matrix A */ +/* > as A = L * U, where L is a product of permutation and unit lower */ +/* > bidiagonal matrices and U is upper triangular with nonzeros in */ +/* > only the main diagonal and first two superdiagonals. */ +/* > */ +/* > 2. If some U(i,i)=0, so that U is exactly singular, then the routine */ +/* > returns with INFO = i. Otherwise, the factored form of A is used */ +/* > to estimate the condition number of the matrix A. If the */ +/* > reciprocal of the condition number is less than machine precision, */ +/* > INFO = N+1 is returned as a warning, but the routine still goes on */ +/* > to solve for X and compute error bounds as described below. */ +/* > */ +/* > 3. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 4. Iterative refinement is applied to improve the computed solution */ +/* > matrix and calculate error bounds and backward error estimates */ +/* > for it. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of A has been */ +/* > supplied on entry. */ +/* > = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored form */ +/* > of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not */ +/* > be modified. */ +/* > = 'N': The matrix will be copied to DLF, DF, and DUF */ +/* > and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) subdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > The n diagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) superdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DLF */ +/* > \verbatim */ +/* > DLF is COMPLEX*16 array, dimension (N-1) */ +/* > If FACT = 'F', then DLF is an input argument and on entry */ +/* > contains the (n-1) multipliers that define the matrix L from */ +/* > the LU factorization of A as computed by ZGTTRF. */ +/* > */ +/* > If FACT = 'N', then DLF is an output argument and on exit */ +/* > contains the (n-1) multipliers that define the matrix L from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DF */ +/* > \verbatim */ +/* > DF is COMPLEX*16 array, dimension (N) */ +/* > If FACT = 'F', then DF is an input argument and on entry */ +/* > contains the n diagonal elements of the upper triangular */ +/* > matrix U from the LU factorization of A. */ +/* > */ +/* > If FACT = 'N', then DF is an output argument and on exit */ +/* > contains the n diagonal elements of the upper triangular */ +/* > matrix U from the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DUF */ +/* > \verbatim */ +/* > DUF is COMPLEX*16 array, dimension (N-1) */ +/* > If FACT = 'F', then DUF is an input argument and on entry */ +/* > contains the (n-1) elements of the first superdiagonal of U. */ +/* > */ +/* > If FACT = 'N', then DUF is an output argument and on exit */ +/* > contains the (n-1) elements of the first superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX*16 array, dimension (N-2) */ +/* > If FACT = 'F', then DU2 is an input argument and on entry */ +/* > contains the (n-2) elements of the second superdiagonal of */ +/* > U. */ +/* > */ +/* > If FACT = 'N', then DU2 is an output argument and on exit */ +/* > contains the (n-2) elements of the second superdiagonal of */ +/* > U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the LU factorization of A as */ +/* > computed by ZGTTRF. */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the LU factorization of A; */ +/* > row i of the matrix was interchanged with row IPIV(i). */ +/* > IPIV(i) will always be either i or i+1; IPIV(i) = i indicates */ +/* > a row interchange was not required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > The N-by-NRHS right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (LDX,NRHS) */ +/* > If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The estimate of the reciprocal condition number of the matrix */ +/* > A. If RCOND is less than the machine precision (in */ +/* > particular, if RCOND = 0), the matrix is singular to working */ +/* > precision. This condition is indicated by a return code of */ +/* > INFO > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: U(i,i) is exactly zero. The factorization */ +/* > has not been completed unless i = N, but the */ +/* > factor U is exactly singular, so the solution */ +/* > and error bounds could not be computed. */ +/* > RCOND = 0 is returned. */ +/* > = N+1: U is nonsingular, but RCOND is less than machine */ +/* > precision, meaning that the matrix is singular */ +/* > to working precision. Nevertheless, the */ +/* > solution and error bounds are computed because */ +/* > there are a number of situations where the */ +/* > computed solution can be more accurate than the */ +/* > value of RCOND would suggest. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GTsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, + doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, + doublereal *berr, doublecomplex *work, doublereal *rwork, integer * + info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1; + + /* Local variables */ + char norm[1]; + extern logical lsame_(char *, char *); + doublereal anorm; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + extern doublereal dlamch_(char *); + logical nofact; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern doublereal zlangt_(char *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *); + logical notran; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zgtcon_(char *, integer *, doublecomplex *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *), zgtrfs_(char *, + integer *, integer *, doublecomplex *, doublecomplex *, + doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * + , doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublereal *, integer *), zgttrf_( + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + doublecomplex *, integer *, integer *), zgttrs_(char *, integer *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --dlf; + --df; + --duf; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + notran = lsame_(trans, "N"); + if (! nofact && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldb < f2cmax(1,*n)) { + *info = -14; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGTSVX", &i__1, (ftnlen)6); + return 0; + } + + if (nofact) { + +/* Compute the LU factorization of A. */ + + zcopy_(n, &d__[1], &c__1, &df[1], &c__1); + if (*n > 1) { + i__1 = *n - 1; + zcopy_(&i__1, &dl[1], &c__1, &dlf[1], &c__1); + i__1 = *n - 1; + zcopy_(&i__1, &du[1], &c__1, &duf[1], &c__1); + } + zgttrf_(n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = zlangt_(norm, n, &dl[1], &d__[1], &du[1]); + +/* Compute the reciprocal of the condition number of A. */ + + zgtcon_(norm, n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &anorm, + rcond, &work[1], info); + +/* Compute the solution vectors X. */ + + zlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + zgttrs_(trans, n, nrhs, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &x[ + x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solutions and */ +/* compute error bounds and backward error estimates for them. */ + + zgtrfs_(trans, n, nrhs, &dl[1], &d__[1], &du[1], &dlf[1], &df[1], &duf[1], + &du2[1], &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1] + , &berr[1], &work[1], &rwork[1], info); + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < dlamch_("Epsilon")) { + *info = *n + 1; + } + + return 0; + +/* End of ZGTSVX */ + +} /* zgtsvx_ */ + diff --git a/lapack-netlib/SRC/zgttrf.c b/lapack-netlib/SRC/zgttrf.c new file mode 100644 index 000000000..a9ddab556 --- /dev/null +++ b/lapack-netlib/SRC/zgttrf.c @@ -0,0 +1,696 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGTTRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGTTRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) */ + +/* INTEGER INFO, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 D( * ), DL( * ), DU( * ), DU2( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGTTRF computes an LU factorization of a complex tridiagonal matrix A */ +/* > using elimination with partial pivoting and row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = L * U */ +/* > where L is a product of permutation and unit lower bidiagonal */ +/* > matrices and U is upper triangular with nonzeros in only the main */ +/* > diagonal and first two superdiagonals. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DL */ +/* > \verbatim */ +/* > DL is COMPLEX*16 array, dimension (N-1) */ +/* > On entry, DL must contain the (n-1) sub-diagonal elements of */ +/* > A. */ +/* > */ +/* > On exit, DL is overwritten by the (n-1) multipliers that */ +/* > define the matrix L from the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > On entry, D must contain the diagonal elements of A. */ +/* > */ +/* > On exit, D is overwritten by the n diagonal elements of the */ +/* > upper triangular matrix U from the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DU */ +/* > \verbatim */ +/* > DU is COMPLEX*16 array, dimension (N-1) */ +/* > On entry, DU must contain the (n-1) super-diagonal elements */ +/* > of A. */ +/* > */ +/* > On exit, DU is overwritten by the (n-1) elements of the first */ +/* > super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX*16 array, dimension (N-2) */ +/* > On exit, DU2 is overwritten by the (n-2) elements of the */ +/* > second super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgttrf_(integer *n, doublecomplex *dl, doublecomplex * + d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, integer * + info) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2; + + /* Local variables */ + doublecomplex fact, temp; + integer i__; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --ipiv; + --du2; + --du; + --d__; + --dl; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + i__1 = -(*info); + xerbla_("ZGTTRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Initialize IPIV(i) = i and DU2(i) = 0 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ipiv[i__] = i__; +/* L10: */ + } + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + du2[i__2].r = 0., du2[i__2].i = 0.; +/* L20: */ + } + + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + if ((d__1 = d__[i__2].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), abs( + d__2)) >= (d__3 = dl[i__3].r, abs(d__3)) + (d__4 = d_imag(&dl[ + i__]), abs(d__4))) { + +/* No row interchange required, eliminate DL(I) */ + + i__2 = i__; + if ((d__1 = d__[i__2].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), + abs(d__2)) != 0.) { + z_div(&z__1, &dl[i__], &d__[i__]); + fact.r = z__1.r, fact.i = z__1.i; + i__2 = i__; + dl[i__2].r = fact.r, dl[i__2].i = fact.i; + i__2 = i__ + 1; + i__3 = i__ + 1; + i__4 = i__; + z__2.r = fact.r * du[i__4].r - fact.i * du[i__4].i, z__2.i = + fact.r * du[i__4].i + fact.i * du[i__4].r; + z__1.r = d__[i__3].r - z__2.r, z__1.i = d__[i__3].i - z__2.i; + d__[i__2].r = z__1.r, d__[i__2].i = z__1.i; + } + } else { + +/* Interchange rows I and I+1, eliminate DL(I) */ + + z_div(&z__1, &d__[i__], &dl[i__]); + fact.r = z__1.r, fact.i = z__1.i; + i__2 = i__; + i__3 = i__; + d__[i__2].r = dl[i__3].r, d__[i__2].i = dl[i__3].i; + i__2 = i__; + dl[i__2].r = fact.r, dl[i__2].i = fact.i; + i__2 = i__; + temp.r = du[i__2].r, temp.i = du[i__2].i; + i__2 = i__; + i__3 = i__ + 1; + du[i__2].r = d__[i__3].r, du[i__2].i = d__[i__3].i; + i__2 = i__ + 1; + i__3 = i__ + 1; + z__2.r = fact.r * d__[i__3].r - fact.i * d__[i__3].i, z__2.i = + fact.r * d__[i__3].i + fact.i * d__[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + d__[i__2].r = z__1.r, d__[i__2].i = z__1.i; + i__2 = i__; + i__3 = i__ + 1; + du2[i__2].r = du[i__3].r, du2[i__2].i = du[i__3].i; + i__2 = i__ + 1; + z__2.r = -fact.r, z__2.i = -fact.i; + i__3 = i__ + 1; + z__1.r = z__2.r * du[i__3].r - z__2.i * du[i__3].i, z__1.i = + z__2.r * du[i__3].i + z__2.i * du[i__3].r; + du[i__2].r = z__1.r, du[i__2].i = z__1.i; + ipiv[i__] = i__ + 1; + } +/* L30: */ + } + if (*n > 1) { + i__ = *n - 1; + i__1 = i__; + i__2 = i__; + if ((d__1 = d__[i__1].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), abs( + d__2)) >= (d__3 = dl[i__2].r, abs(d__3)) + (d__4 = d_imag(&dl[ + i__]), abs(d__4))) { + i__1 = i__; + if ((d__1 = d__[i__1].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), + abs(d__2)) != 0.) { + z_div(&z__1, &dl[i__], &d__[i__]); + fact.r = z__1.r, fact.i = z__1.i; + i__1 = i__; + dl[i__1].r = fact.r, dl[i__1].i = fact.i; + i__1 = i__ + 1; + i__2 = i__ + 1; + i__3 = i__; + z__2.r = fact.r * du[i__3].r - fact.i * du[i__3].i, z__2.i = + fact.r * du[i__3].i + fact.i * du[i__3].r; + z__1.r = d__[i__2].r - z__2.r, z__1.i = d__[i__2].i - z__2.i; + d__[i__1].r = z__1.r, d__[i__1].i = z__1.i; + } + } else { + z_div(&z__1, &d__[i__], &dl[i__]); + fact.r = z__1.r, fact.i = z__1.i; + i__1 = i__; + i__2 = i__; + d__[i__1].r = dl[i__2].r, d__[i__1].i = dl[i__2].i; + i__1 = i__; + dl[i__1].r = fact.r, dl[i__1].i = fact.i; + i__1 = i__; + temp.r = du[i__1].r, temp.i = du[i__1].i; + i__1 = i__; + i__2 = i__ + 1; + du[i__1].r = d__[i__2].r, du[i__1].i = d__[i__2].i; + i__1 = i__ + 1; + i__2 = i__ + 1; + z__2.r = fact.r * d__[i__2].r - fact.i * d__[i__2].i, z__2.i = + fact.r * d__[i__2].i + fact.i * d__[i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + d__[i__1].r = z__1.r, d__[i__1].i = z__1.i; + ipiv[i__] = i__ + 1; + } + } + +/* Check for a zero on the diagonal of U. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + if ((d__1 = d__[i__2].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), abs( + d__2)) == 0.) { + *info = i__; + goto L50; + } +/* L40: */ + } +L50: + + return 0; + +/* End of ZGTTRF */ + +} /* zgttrf_ */ + diff --git a/lapack-netlib/SRC/zgttrs.c b/lapack-netlib/SRC/zgttrs.c new file mode 100644 index 000000000..34f2b4ff6 --- /dev/null +++ b/lapack-netlib/SRC/zgttrs.c @@ -0,0 +1,637 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGTTRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGTTRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGTTRS solves one of the systems of equations */ +/* > A * X = B, A**T * X = B, or A**H * X = B, */ +/* > with a tridiagonal matrix A using the LU factorization computed */ +/* > by ZGTTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations. */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) elements of the first super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX*16 array, dimension (N-2) */ +/* > The (n-2) elements of the second super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the matrix of right hand side vectors B. */ +/* > On exit, B is overwritten by the solution vectors X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgttrs_(char *trans, integer *n, integer *nrhs, + doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, + integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer j, jb, nb; + extern /* Subroutine */ int zgtts2_(integer *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * + , integer *, doublecomplex *, integer *), xerbla_(char *, integer + *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer itrans; + logical notran; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = *(unsigned char *)trans == 'N' || *(unsigned char *)trans == 'n'; + if (! notran && ! (*(unsigned char *)trans == 'T' || *(unsigned char *) + trans == 't') && ! (*(unsigned char *)trans == 'C' || *(unsigned + char *)trans == 'c')) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < f2cmax(*n,1)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGTTRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + +/* Decode TRANS */ + + if (notran) { + itrans = 0; + } else if (*(unsigned char *)trans == 'T' || *(unsigned char *)trans == + 't') { + itrans = 1; + } else { + itrans = 2; + } + +/* Determine the number of right-hand sides to solve at a time. */ + + if (*nrhs == 1) { + nb = 1; + } else { +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "ZGTTRS", trans, n, nrhs, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nb = f2cmax(i__1,i__2); + } + + if (nb >= *nrhs) { + zgtts2_(&itrans, n, nrhs, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[1], + &b[b_offset], ldb); + } else { + i__1 = *nrhs; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = *nrhs - j + 1; + jb = f2cmin(i__3,nb); + zgtts2_(&itrans, n, &jb, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[ + 1], &b[j * b_dim1 + 1], ldb); +/* L10: */ + } + } + +/* End of ZGTTRS */ + + return 0; +} /* zgttrs_ */ + diff --git a/lapack-netlib/SRC/zgtts2.c b/lapack-netlib/SRC/zgtts2.c new file mode 100644 index 000000000..5670eed48 --- /dev/null +++ b/lapack-netlib/SRC/zgtts2.c @@ -0,0 +1,1023 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZGTTS2 solves a system of linear equations with a tridiagonal matrix using the LU factorization + computed by sgttrf. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZGTTS2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) */ + +/* INTEGER ITRANS, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGTTS2 solves one of the systems of equations */ +/* > A * X = B, A**T * X = B, or A**H * X = B, */ +/* > with a tridiagonal matrix A using the LU factorization computed */ +/* > by ZGTTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITRANS */ +/* > \verbatim */ +/* > ITRANS is INTEGER */ +/* > Specifies the form of the system of equations. */ +/* > = 0: A * X = B (No transpose) */ +/* > = 1: A**T * X = B (Transpose) */ +/* > = 2: A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX*16 array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX*16 array, dimension (N-1) */ +/* > The (n-1) elements of the first super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX*16 array, dimension (N-2) */ +/* > The (n-2) elements of the second super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the matrix of right hand side vectors B. */ +/* > On exit, B is overwritten by the solution vectors X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16GTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zgtts2_(integer *itrans, integer *n, integer *nrhs, + doublecomplex *dl, doublecomplex *d__, doublecomplex *du, + doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8; + + /* Local variables */ + doublecomplex temp; + integer i__, j; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Quick return if possible */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (*itrans == 0) { + +/* Solve A*X = B using the LU factorization of A, */ +/* overwriting each right hand side vector with its solution. */ + + if (*nrhs <= 1) { + j = 1; +L10: + +/* Solve L*x = b. */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (ipiv[i__] == i__) { + i__2 = i__ + 1 + j * b_dim1; + i__3 = i__ + 1 + j * b_dim1; + i__4 = i__; + i__5 = i__ + j * b_dim1; + z__2.r = dl[i__4].r * b[i__5].r - dl[i__4].i * b[i__5].i, + z__2.i = dl[i__4].r * b[i__5].i + dl[i__4].i * b[ + i__5].r; + z__1.r = b[i__3].r - z__2.r, z__1.i = b[i__3].i - z__2.i; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } else { + i__2 = i__ + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + i__2 = i__ + j * b_dim1; + i__3 = i__ + 1 + j * b_dim1; + b[i__2].r = b[i__3].r, b[i__2].i = b[i__3].i; + i__2 = i__ + 1 + j * b_dim1; + i__3 = i__; + i__4 = i__ + j * b_dim1; + z__2.r = dl[i__3].r * b[i__4].r - dl[i__3].i * b[i__4].i, + z__2.i = dl[i__3].r * b[i__4].i + dl[i__3].i * b[ + i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } +/* L20: */ + } + +/* Solve U*x = b. */ + + i__1 = *n + j * b_dim1; + z_div(&z__1, &b[*n + j * b_dim1], &d__[*n]); + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + if (*n > 1) { + i__1 = *n - 1 + j * b_dim1; + i__2 = *n - 1 + j * b_dim1; + i__3 = *n - 1; + i__4 = *n + j * b_dim1; + z__3.r = du[i__3].r * b[i__4].r - du[i__3].i * b[i__4].i, + z__3.i = du[i__3].r * b[i__4].i + du[i__3].i * b[i__4] + .r; + z__2.r = b[i__2].r - z__3.r, z__2.i = b[i__2].i - z__3.i; + z_div(&z__1, &z__2, &d__[*n - 1]); + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + i__1 = i__ + j * b_dim1; + i__2 = i__ + j * b_dim1; + i__3 = i__; + i__4 = i__ + 1 + j * b_dim1; + z__4.r = du[i__3].r * b[i__4].r - du[i__3].i * b[i__4].i, + z__4.i = du[i__3].r * b[i__4].i + du[i__3].i * b[i__4] + .r; + z__3.r = b[i__2].r - z__4.r, z__3.i = b[i__2].i - z__4.i; + i__5 = i__; + i__6 = i__ + 2 + j * b_dim1; + z__5.r = du2[i__5].r * b[i__6].r - du2[i__5].i * b[i__6].i, + z__5.i = du2[i__5].r * b[i__6].i + du2[i__5].i * b[ + i__6].r; + z__2.r = z__3.r - z__5.r, z__2.i = z__3.i - z__5.i; + z_div(&z__1, &z__2, &d__[i__]); + b[i__1].r = z__1.r, b[i__1].i = z__1.i; +/* L30: */ + } + if (j < *nrhs) { + ++j; + goto L10; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve L*x = b. */ + + i__2 = *n - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (ipiv[i__] == i__) { + i__3 = i__ + 1 + j * b_dim1; + i__4 = i__ + 1 + j * b_dim1; + i__5 = i__; + i__6 = i__ + j * b_dim1; + z__2.r = dl[i__5].r * b[i__6].r - dl[i__5].i * b[i__6] + .i, z__2.i = dl[i__5].r * b[i__6].i + dl[i__5] + .i * b[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } else { + i__3 = i__ + j * b_dim1; + temp.r = b[i__3].r, temp.i = b[i__3].i; + i__3 = i__ + j * b_dim1; + i__4 = i__ + 1 + j * b_dim1; + b[i__3].r = b[i__4].r, b[i__3].i = b[i__4].i; + i__3 = i__ + 1 + j * b_dim1; + i__4 = i__; + i__5 = i__ + j * b_dim1; + z__2.r = dl[i__4].r * b[i__5].r - dl[i__4].i * b[i__5] + .i, z__2.i = dl[i__4].r * b[i__5].i + dl[i__4] + .i * b[i__5].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } +/* L40: */ + } + +/* Solve U*x = b. */ + + i__2 = *n + j * b_dim1; + z_div(&z__1, &b[*n + j * b_dim1], &d__[*n]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + if (*n > 1) { + i__2 = *n - 1 + j * b_dim1; + i__3 = *n - 1 + j * b_dim1; + i__4 = *n - 1; + i__5 = *n + j * b_dim1; + z__3.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, + z__3.i = du[i__4].r * b[i__5].i + du[i__4].i * b[ + i__5].r; + z__2.r = b[i__3].r - z__3.r, z__2.i = b[i__3].i - z__3.i; + z_div(&z__1, &z__2, &d__[*n - 1]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + i__4 = i__; + i__5 = i__ + 1 + j * b_dim1; + z__4.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, + z__4.i = du[i__4].r * b[i__5].i + du[i__4].i * b[ + i__5].r; + z__3.r = b[i__3].r - z__4.r, z__3.i = b[i__3].i - z__4.i; + i__6 = i__; + i__7 = i__ + 2 + j * b_dim1; + z__5.r = du2[i__6].r * b[i__7].r - du2[i__6].i * b[i__7] + .i, z__5.i = du2[i__6].r * b[i__7].i + du2[i__6] + .i * b[i__7].r; + z__2.r = z__3.r - z__5.r, z__2.i = z__3.i - z__5.i; + z_div(&z__1, &z__2, &d__[i__]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L50: */ + } +/* L60: */ + } + } + } else if (*itrans == 1) { + +/* Solve A**T * X = B. */ + + if (*nrhs <= 1) { + j = 1; +L70: + +/* Solve U**T * x = b. */ + + i__1 = j * b_dim1 + 1; + z_div(&z__1, &b[j * b_dim1 + 1], &d__[1]); + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + if (*n > 1) { + i__1 = j * b_dim1 + 2; + i__2 = j * b_dim1 + 2; + i__3 = j * b_dim1 + 1; + z__3.r = du[1].r * b[i__3].r - du[1].i * b[i__3].i, z__3.i = + du[1].r * b[i__3].i + du[1].i * b[i__3].r; + z__2.r = b[i__2].r - z__3.r, z__2.i = b[i__2].i - z__3.i; + z_div(&z__1, &z__2, &d__[2]); + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + } + i__1 = *n; + for (i__ = 3; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + i__4 = i__ - 1; + i__5 = i__ - 1 + j * b_dim1; + z__4.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, + z__4.i = du[i__4].r * b[i__5].i + du[i__4].i * b[i__5] + .r; + z__3.r = b[i__3].r - z__4.r, z__3.i = b[i__3].i - z__4.i; + i__6 = i__ - 2; + i__7 = i__ - 2 + j * b_dim1; + z__5.r = du2[i__6].r * b[i__7].r - du2[i__6].i * b[i__7].i, + z__5.i = du2[i__6].r * b[i__7].i + du2[i__6].i * b[ + i__7].r; + z__2.r = z__3.r - z__5.r, z__2.i = z__3.i - z__5.i; + z_div(&z__1, &z__2, &d__[i__]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L80: */ + } + +/* Solve L**T * x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + i__1 = i__ + j * b_dim1; + i__2 = i__ + j * b_dim1; + i__3 = i__; + i__4 = i__ + 1 + j * b_dim1; + z__2.r = dl[i__3].r * b[i__4].r - dl[i__3].i * b[i__4].i, + z__2.i = dl[i__3].r * b[i__4].i + dl[i__3].i * b[ + i__4].r; + z__1.r = b[i__2].r - z__2.r, z__1.i = b[i__2].i - z__2.i; + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + } else { + i__1 = i__ + 1 + j * b_dim1; + temp.r = b[i__1].r, temp.i = b[i__1].i; + i__1 = i__ + 1 + j * b_dim1; + i__2 = i__ + j * b_dim1; + i__3 = i__; + z__2.r = dl[i__3].r * temp.r - dl[i__3].i * temp.i, + z__2.i = dl[i__3].r * temp.i + dl[i__3].i * + temp.r; + z__1.r = b[i__2].r - z__2.r, z__1.i = b[i__2].i - z__2.i; + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + i__1 = i__ + j * b_dim1; + b[i__1].r = temp.r, b[i__1].i = temp.i; + } +/* L90: */ + } + if (j < *nrhs) { + ++j; + goto L70; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve U**T * x = b. */ + + i__2 = j * b_dim1 + 1; + z_div(&z__1, &b[j * b_dim1 + 1], &d__[1]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + if (*n > 1) { + i__2 = j * b_dim1 + 2; + i__3 = j * b_dim1 + 2; + i__4 = j * b_dim1 + 1; + z__3.r = du[1].r * b[i__4].r - du[1].i * b[i__4].i, + z__3.i = du[1].r * b[i__4].i + du[1].i * b[i__4] + .r; + z__2.r = b[i__3].r - z__3.r, z__2.i = b[i__3].i - z__3.i; + z_div(&z__1, &z__2, &d__[2]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = *n; + for (i__ = 3; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ - 1; + i__6 = i__ - 1 + j * b_dim1; + z__4.r = du[i__5].r * b[i__6].r - du[i__5].i * b[i__6].i, + z__4.i = du[i__5].r * b[i__6].i + du[i__5].i * b[ + i__6].r; + z__3.r = b[i__4].r - z__4.r, z__3.i = b[i__4].i - z__4.i; + i__7 = i__ - 2; + i__8 = i__ - 2 + j * b_dim1; + z__5.r = du2[i__7].r * b[i__8].r - du2[i__7].i * b[i__8] + .i, z__5.i = du2[i__7].r * b[i__8].i + du2[i__7] + .i * b[i__8].r; + z__2.r = z__3.r - z__5.r, z__2.i = z__3.i - z__5.i; + z_div(&z__1, &z__2, &d__[i__]); + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L100: */ + } + +/* Solve L**T * x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + i__4 = i__; + i__5 = i__ + 1 + j * b_dim1; + z__2.r = dl[i__4].r * b[i__5].r - dl[i__4].i * b[i__5] + .i, z__2.i = dl[i__4].r * b[i__5].i + dl[i__4] + .i * b[i__5].r; + z__1.r = b[i__3].r - z__2.r, z__1.i = b[i__3].i - + z__2.i; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } else { + i__2 = i__ + 1 + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + i__2 = i__ + 1 + j * b_dim1; + i__3 = i__ + j * b_dim1; + i__4 = i__; + z__2.r = dl[i__4].r * temp.r - dl[i__4].i * temp.i, + z__2.i = dl[i__4].r * temp.i + dl[i__4].i * + temp.r; + z__1.r = b[i__3].r - z__2.r, z__1.i = b[i__3].i - + z__2.i; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + +/* Solve A**H * X = B. */ + + if (*nrhs <= 1) { + j = 1; +L130: + +/* Solve U**H * x = b. */ + + i__1 = j * b_dim1 + 1; + d_cnjg(&z__2, &d__[1]); + z_div(&z__1, &b[j * b_dim1 + 1], &z__2); + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + if (*n > 1) { + i__1 = j * b_dim1 + 2; + i__2 = j * b_dim1 + 2; + d_cnjg(&z__4, &du[1]); + i__3 = j * b_dim1 + 1; + z__3.r = z__4.r * b[i__3].r - z__4.i * b[i__3].i, z__3.i = + z__4.r * b[i__3].i + z__4.i * b[i__3].r; + z__2.r = b[i__2].r - z__3.r, z__2.i = b[i__2].i - z__3.i; + d_cnjg(&z__5, &d__[2]); + z_div(&z__1, &z__2, &z__5); + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + } + i__1 = *n; + for (i__ = 3; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + d_cnjg(&z__5, &du[i__ - 1]); + i__4 = i__ - 1 + j * b_dim1; + z__4.r = z__5.r * b[i__4].r - z__5.i * b[i__4].i, z__4.i = + z__5.r * b[i__4].i + z__5.i * b[i__4].r; + z__3.r = b[i__3].r - z__4.r, z__3.i = b[i__3].i - z__4.i; + d_cnjg(&z__7, &du2[i__ - 2]); + i__5 = i__ - 2 + j * b_dim1; + z__6.r = z__7.r * b[i__5].r - z__7.i * b[i__5].i, z__6.i = + z__7.r * b[i__5].i + z__7.i * b[i__5].r; + z__2.r = z__3.r - z__6.r, z__2.i = z__3.i - z__6.i; + d_cnjg(&z__8, &d__[i__]); + z_div(&z__1, &z__2, &z__8); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L140: */ + } + +/* Solve L**H * x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + i__1 = i__ + j * b_dim1; + i__2 = i__ + j * b_dim1; + d_cnjg(&z__3, &dl[i__]); + i__3 = i__ + 1 + j * b_dim1; + z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3].i, z__2.i = + z__3.r * b[i__3].i + z__3.i * b[i__3].r; + z__1.r = b[i__2].r - z__2.r, z__1.i = b[i__2].i - z__2.i; + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + } else { + i__1 = i__ + 1 + j * b_dim1; + temp.r = b[i__1].r, temp.i = b[i__1].i; + i__1 = i__ + 1 + j * b_dim1; + i__2 = i__ + j * b_dim1; + d_cnjg(&z__3, &dl[i__]); + z__2.r = z__3.r * temp.r - z__3.i * temp.i, z__2.i = + z__3.r * temp.i + z__3.i * temp.r; + z__1.r = b[i__2].r - z__2.r, z__1.i = b[i__2].i - z__2.i; + b[i__1].r = z__1.r, b[i__1].i = z__1.i; + i__1 = i__ + j * b_dim1; + b[i__1].r = temp.r, b[i__1].i = temp.i; + } +/* L150: */ + } + if (j < *nrhs) { + ++j; + goto L130; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve U**H * x = b. */ + + i__2 = j * b_dim1 + 1; + d_cnjg(&z__2, &d__[1]); + z_div(&z__1, &b[j * b_dim1 + 1], &z__2); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + if (*n > 1) { + i__2 = j * b_dim1 + 2; + i__3 = j * b_dim1 + 2; + d_cnjg(&z__4, &du[1]); + i__4 = j * b_dim1 + 1; + z__3.r = z__4.r * b[i__4].r - z__4.i * b[i__4].i, z__3.i = + z__4.r * b[i__4].i + z__4.i * b[i__4].r; + z__2.r = b[i__3].r - z__3.r, z__2.i = b[i__3].i - z__3.i; + d_cnjg(&z__5, &d__[2]); + z_div(&z__1, &z__2, &z__5); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = *n; + for (i__ = 3; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + d_cnjg(&z__5, &du[i__ - 1]); + i__5 = i__ - 1 + j * b_dim1; + z__4.r = z__5.r * b[i__5].r - z__5.i * b[i__5].i, z__4.i = + z__5.r * b[i__5].i + z__5.i * b[i__5].r; + z__3.r = b[i__4].r - z__4.r, z__3.i = b[i__4].i - z__4.i; + d_cnjg(&z__7, &du2[i__ - 2]); + i__6 = i__ - 2 + j * b_dim1; + z__6.r = z__7.r * b[i__6].r - z__7.i * b[i__6].i, z__6.i = + z__7.r * b[i__6].i + z__7.i * b[i__6].r; + z__2.r = z__3.r - z__6.r, z__2.i = z__3.i - z__6.i; + d_cnjg(&z__8, &d__[i__]); + z_div(&z__1, &z__2, &z__8); + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L160: */ + } + +/* Solve L**H * x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + d_cnjg(&z__3, &dl[i__]); + i__4 = i__ + 1 + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = b[i__3].r - z__2.r, z__1.i = b[i__3].i - + z__2.i; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } else { + i__2 = i__ + 1 + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + i__2 = i__ + 1 + j * b_dim1; + i__3 = i__ + j * b_dim1; + d_cnjg(&z__3, &dl[i__]); + z__2.r = z__3.r * temp.r - z__3.i * temp.i, z__2.i = + z__3.r * temp.i + z__3.i * temp.r; + z__1.r = b[i__3].r - z__2.r, z__1.i = b[i__3].i - + z__2.i; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + } +/* L170: */ + } +/* L180: */ + } + } + } + +/* End of ZGTTS2 */ + + return 0; +} /* zgtts2_ */ + diff --git a/lapack-netlib/SRC/zhb2st_kernels.c b/lapack-netlib/SRC/zhb2st_kernels.c new file mode 100644 index 000000000..6b634cb40 --- /dev/null +++ b/lapack-netlib/SRC/zhb2st_kernels.c @@ -0,0 +1,802 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHB2ST_KERNELS */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHB2ST_KERNELS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, */ +/* ST, ED, SWEEP, N, NB, IB, */ +/* A, LDA, V, TAU, LDVT, WORK) */ + +/* IMPLICIT NONE */ + +/* CHARACTER UPLO */ +/* LOGICAL WANTZ */ +/* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT */ +/* COMPLEX*16 A( LDA, * ), V( * ), */ +/* TAU( * ), WORK( * ) */ + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST */ +/* > subroutine. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > \endverbatim */ +/* > */ +/* > \param[in] WANTZ */ +/* > \verbatim */ +/* > WANTZ is LOGICAL which indicate if Eigenvalue are requested or both */ +/* > Eigenvalue/Eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TTYPE */ +/* > \verbatim */ +/* > TTYPE is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ST */ +/* > \verbatim */ +/* > ST is INTEGER */ +/* > internal parameter for indices. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ED */ +/* > \verbatim */ +/* > ED is INTEGER */ +/* > internal parameter for indices. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SWEEP */ +/* > \verbatim */ +/* > SWEEP is INTEGER */ +/* > internal parameter for indices. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER. The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB */ +/* > \verbatim */ +/* > NB is INTEGER. The size of the band. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IB */ +/* > \verbatim */ +/* > IB is INTEGER. */ +/* > \endverbatim */ +/* > */ +/* > \param[in, out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array. A pointer to the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER. The leading dimension of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension 2*n if eigenvalues only are */ +/* > requested or to be queried for vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (2*n). */ +/* > The scalar factors of the Householder reflectors are stored */ +/* > in this array. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array. Workspace of size nb. */ +/* > \endverbatim */ +/* > */ +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Implemented by Azzam Haidar. */ +/* > */ +/* > All details are available on technical report, SC11, SC13 papers. */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhb2st_kernels_(char *uplo, logical *wantz, integer * + ttype, integer *st, integer *ed, integer *sweep, integer *n, integer * + nb, integer *ib, doublecomplex *a, integer *lda, doublecomplex *v, + doublecomplex *tau, integer *ldvt, doublecomplex *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + doublecomplex ctmp; + integer dpos, vpos, i__; + extern logical lsame_(char *, char *); + logical upper; + integer j1, j2, lm, ln, ajeter; + extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *); + integer ofdpos; + extern /* Subroutine */ int zlarfx_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *), zlarfy_(char *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *); + integer taupos; + + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --v; + --tau; + --work; + + /* Function Body */ + ajeter = *ib + *ldvt; + upper = lsame_(uplo, "U"); + if (upper) { + dpos = (*nb << 1) + 1; + ofdpos = *nb << 1; + } else { + dpos = 1; + ofdpos = 2; + } + +/* Upper case */ + + if (upper) { + + if (*wantz) { + vpos = (*sweep - 1) % 2 * *n + *st; + taupos = (*sweep - 1) % 2 * *n + *st; + } else { + vpos = (*sweep - 1) % 2 * *n + *st; + taupos = (*sweep - 1) % 2 * *n + *st; + } + + if (*ttype == 1) { + lm = *ed - *st + 1; + + i__1 = vpos; + v[i__1].r = 1., v[i__1].i = 0.; + i__1 = lm - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = vpos + i__; + d_cnjg(&z__1, &a[ofdpos - i__ + (*st + i__) * a_dim1]); + v[i__2].r = z__1.r, v[i__2].i = z__1.i; + i__2 = ofdpos - i__ + (*st + i__) * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L10: */ + } + d_cnjg(&z__1, &a[ofdpos + *st * a_dim1]); + ctmp.r = z__1.r, ctmp.i = z__1.i; + zlarfg_(&lm, &ctmp, &v[vpos + 1], &c__1, &tau[taupos]); + i__1 = ofdpos + *st * a_dim1; + a[i__1].r = ctmp.r, a[i__1].i = ctmp.i; + + lm = *ed - *st + 1; + d_cnjg(&z__1, &tau[taupos]); + i__1 = *lda - 1; + zlarfy_(uplo, &lm, &v[vpos], &c__1, &z__1, &a[dpos + *st * a_dim1] + , &i__1, &work[1]); + } + + if (*ttype == 3) { + + lm = *ed - *st + 1; + d_cnjg(&z__1, &tau[taupos]); + i__1 = *lda - 1; + zlarfy_(uplo, &lm, &v[vpos], &c__1, &z__1, &a[dpos + *st * a_dim1] + , &i__1, &work[1]); + } + + if (*ttype == 2) { + j1 = *ed + 1; +/* Computing MIN */ + i__1 = *ed + *nb; + j2 = f2cmin(i__1,*n); + ln = *ed - *st + 1; + lm = j2 - j1 + 1; + if (lm > 0) { + d_cnjg(&z__1, &tau[taupos]); + i__1 = *lda - 1; + zlarfx_("Left", &ln, &lm, &v[vpos], &z__1, &a[dpos - *nb + j1 + * a_dim1], &i__1, &work[1]); + + if (*wantz) { + vpos = (*sweep - 1) % 2 * *n + j1; + taupos = (*sweep - 1) % 2 * *n + j1; + } else { + vpos = (*sweep - 1) % 2 * *n + j1; + taupos = (*sweep - 1) % 2 * *n + j1; + } + + i__1 = vpos; + v[i__1].r = 1., v[i__1].i = 0.; + i__1 = lm - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = vpos + i__; + d_cnjg(&z__1, &a[dpos - *nb - i__ + (j1 + i__) * a_dim1]); + v[i__2].r = z__1.r, v[i__2].i = z__1.i; + i__2 = dpos - *nb - i__ + (j1 + i__) * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } + d_cnjg(&z__1, &a[dpos - *nb + j1 * a_dim1]); + ctmp.r = z__1.r, ctmp.i = z__1.i; + zlarfg_(&lm, &ctmp, &v[vpos + 1], &c__1, &tau[taupos]); + i__1 = dpos - *nb + j1 * a_dim1; + a[i__1].r = ctmp.r, a[i__1].i = ctmp.i; + + i__1 = ln - 1; + i__2 = *lda - 1; + zlarfx_("Right", &i__1, &lm, &v[vpos], &tau[taupos], &a[dpos + - *nb + 1 + j1 * a_dim1], &i__2, &work[1]); + } + } + +/* Lower case */ + + } else { + + if (*wantz) { + vpos = (*sweep - 1) % 2 * *n + *st; + taupos = (*sweep - 1) % 2 * *n + *st; + } else { + vpos = (*sweep - 1) % 2 * *n + *st; + taupos = (*sweep - 1) % 2 * *n + *st; + } + + if (*ttype == 1) { + lm = *ed - *st + 1; + + i__1 = vpos; + v[i__1].r = 1., v[i__1].i = 0.; + i__1 = lm - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = vpos + i__; + i__3 = ofdpos + i__ + (*st - 1) * a_dim1; + v[i__2].r = a[i__3].r, v[i__2].i = a[i__3].i; + i__2 = ofdpos + i__ + (*st - 1) * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L20: */ + } + zlarfg_(&lm, &a[ofdpos + (*st - 1) * a_dim1], &v[vpos + 1], &c__1, + &tau[taupos]); + + lm = *ed - *st + 1; + + d_cnjg(&z__1, &tau[taupos]); + i__1 = *lda - 1; + zlarfy_(uplo, &lm, &v[vpos], &c__1, &z__1, &a[dpos + *st * a_dim1] + , &i__1, &work[1]); + } + + if (*ttype == 3) { + lm = *ed - *st + 1; + + d_cnjg(&z__1, &tau[taupos]); + i__1 = *lda - 1; + zlarfy_(uplo, &lm, &v[vpos], &c__1, &z__1, &a[dpos + *st * a_dim1] + , &i__1, &work[1]); + } + + if (*ttype == 2) { + j1 = *ed + 1; +/* Computing MIN */ + i__1 = *ed + *nb; + j2 = f2cmin(i__1,*n); + ln = *ed - *st + 1; + lm = j2 - j1 + 1; + + if (lm > 0) { + i__1 = *lda - 1; + zlarfx_("Right", &lm, &ln, &v[vpos], &tau[taupos], &a[dpos + * + nb + *st * a_dim1], &i__1, &work[1]); + + if (*wantz) { + vpos = (*sweep - 1) % 2 * *n + j1; + taupos = (*sweep - 1) % 2 * *n + j1; + } else { + vpos = (*sweep - 1) % 2 * *n + j1; + taupos = (*sweep - 1) % 2 * *n + j1; + } + + i__1 = vpos; + v[i__1].r = 1., v[i__1].i = 0.; + i__1 = lm - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = vpos + i__; + i__3 = dpos + *nb + i__ + *st * a_dim1; + v[i__2].r = a[i__3].r, v[i__2].i = a[i__3].i; + i__2 = dpos + *nb + i__ + *st * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L40: */ + } + zlarfg_(&lm, &a[dpos + *nb + *st * a_dim1], &v[vpos + 1], & + c__1, &tau[taupos]); + + i__1 = ln - 1; + d_cnjg(&z__1, &tau[taupos]); + i__2 = *lda - 1; + zlarfx_("Left", &lm, &i__1, &v[vpos], &z__1, &a[dpos + *nb - + 1 + (*st + 1) * a_dim1], &i__2, &work[1]); + } + } + } + + return 0; + +/* END OF ZHB2ST_KERNELS */ + +} /* zhb2st_kernels__ */ + diff --git a/lapack-netlib/SRC/zhbev.c b/lapack-netlib/SRC/zhbev.c new file mode 100644 index 000000000..10efa6c29 --- /dev/null +++ b/lapack-netlib/SRC/zhbev.c @@ -0,0 +1,715 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHBEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER m +atrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, */ +/* RWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KD, LDAB, LDZ, N */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBEV computes all the eigenvalues and, optionally, eigenvectors of */ +/* > a complex Hermitian band matrix A. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. If UPLO = 'U', the first */ +/* > superdiagonal and the diagonal of the tridiagonal matrix T */ +/* > are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* > the diagonal and first subdiagonal of T are returned in the */ +/* > first two rows of AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* > eigenvectors of the matrix A, with the i-th column of Z */ +/* > holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(1,3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int zhbev_(char *jobz, char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo; + logical lower, wantz; + extern doublereal dlamch_(char *); + integer iscale; + doublereal safmin; + extern doublereal zlanhb_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zhbtrd_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer indrwk; + doublereal smlnum; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *); + doublereal eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBEV ", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (lower) { + i__1 = ab_dim1 + 1; + w[1] = ab[i__1].r; + } else { + i__1 = *kd + 1 + ab_dim1; + w[1] = ab[i__1].r; + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + zlascl_("B", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + zlascl_("Q", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. */ + + inde = 1; + zhbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &rwork[inde], & + z__[z_offset], ldz, &work[1], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEQR. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + indrwk = inde + *n; + zsteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[ + indrwk], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + + return 0; + +/* End of ZHBEV */ + +} /* zhbev_ */ + diff --git a/lapack-netlib/SRC/zhbev_2stage.c b/lapack-netlib/SRC/zhbev_2stage.c new file mode 100644 index 000000000..27dcca395 --- /dev/null +++ b/lapack-netlib/SRC/zhbev_2stage.c @@ -0,0 +1,821 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for +OTHER matrices */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBEV_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, */ +/* WORK, LWORK, RWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KD, LDAB, LDZ, N, LWORK */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of */ +/* > a complex Hermitian band matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. If UPLO = 'U', the first */ +/* > superdiagonal and the diagonal of the tridiagonal matrix T */ +/* > are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* > the diagonal and first subdiagonal of T are returned in the */ +/* > first two rows of AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* > eigenvectors of the matrix A, with the i-th column of Z */ +/* > holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension LWORK */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = (2KD+1)*N + KD*NTHREADS */ +/* > where KD is the size of the band. */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(1,3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16OTHEReigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhbev_2stage_(char *jobz, char *uplo, integer *n, + integer *kd, doublecomplex *ab, integer *ldab, doublereal *w, + doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, + doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + doublereal anrm; + integer imax; + extern /* Subroutine */ int zhetrd_hb2st_(char *, char *, char *, + integer *, integer *, doublecomplex *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *); + doublereal rmin, rmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo, lhtrd, lwmin; + logical lower; + integer lwtrd; + logical wantz; + integer ib; + extern doublereal dlamch_(char *); + integer iscale; + doublereal safmin; + extern doublereal zlanhb_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *); + integer indwrk, indrwk, llwork; + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *); + doublereal eps; + integer indhous; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + } else { + ib = ilaenv2stage_(&c__2, "ZHETRD_HB2ST", jobz, n, kd, &c_n1, & + c_n1); + lhtrd = ilaenv2stage_(&c__3, "ZHETRD_HB2ST", jobz, n, kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "ZHETRD_HB2ST", jobz, n, kd, &ib, & + c_n1); + lwmin = lhtrd + lwtrd; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + } + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBEV_2STAGE ", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (lower) { + i__1 = ab_dim1 + 1; + w[1] = ab[i__1].r; + } else { + i__1 = *kd + 1 + ab_dim1; + w[1] = ab[i__1].r; + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + zlascl_("B", kd, kd, &c_b21, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + zlascl_("Q", kd, kd, &c_b21, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */ + + inde = 1; + indhous = 1; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + + zhetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], & + rwork[inde], &work[indhous], &lhtrd, &work[indwrk], &llwork, & + iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEQR. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + indrwk = inde + *n; + zsteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[ + indrwk], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + + return 0; + +/* End of ZHBEV_2STAGE */ + +} /* zhbev_2stage__ */ + diff --git a/lapack-netlib/SRC/zhbevd.c b/lapack-netlib/SRC/zhbevd.c new file mode 100644 index 000000000..c9a2b76d2 --- /dev/null +++ b/lapack-netlib/SRC/zhbevd.c @@ -0,0 +1,836 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHBEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER +matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBEVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, */ +/* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBEVD computes all the eigenvalues and, optionally, eigenvectors of */ +/* > a complex Hermitian band matrix A. If eigenvectors are desired, it */ +/* > uses a divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. If UPLO = 'U', the first */ +/* > superdiagonal and the diagonal of the tridiagonal matrix T */ +/* > are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* > the diagonal and first subdiagonal of T are returned in the */ +/* > first two rows of AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* > eigenvectors of the matrix A, with the i-th column of Z */ +/* > holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N <= 1, LWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, */ +/* > dimension (LRWORK) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of array RWORK. */ +/* > If N <= 1, LRWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of array IWORK. */ +/* > If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int zhbevd_(char *jobz, char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + integer llwk2; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer lwmin; + logical lower; + integer llrwk; + logical wantz; + integer indwk2; + extern doublereal dlamch_(char *); + integer iscale; + doublereal safmin; + extern doublereal zlanhb_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zstedc_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *, integer + *), zhbtrd_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer indwrk, liwmin; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer lrwmin; + doublereal smlnum; + logical lquery; + doublereal eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1 || *lrwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + } else { + if (wantz) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = i__1 * i__1 << 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n; + lrwmin = *n; + liwmin = 1; + } + } + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info == 0) { + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -13; + } else if (*liwork < liwmin && ! lquery) { + *info = -15; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBEVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = ab_dim1 + 1; + w[1] = ab[i__1].r; + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + zlascl_("B", kd, kd, &c_b13, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + zlascl_("Q", kd, kd, &c_b13, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. */ + + inde = 1; + indwrk = inde + *n; + indwk2 = *n * *n + 1; + llwk2 = *lwork - indwk2 + 1; + llrwk = *lrwork - indwrk + 1; + zhbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &rwork[inde], & + z__[z_offset], ldz, &work[1], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], & + llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info); + zgemm_("N", "N", n, n, n, &c_b2, &z__[z_offset], ldz, &work[1], n, & + c_b1, &work[indwk2], n); + zlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + return 0; + +/* End of ZHBEVD */ + +} /* zhbevd_ */ + diff --git a/lapack-netlib/SRC/zhbevd_2stage.c b/lapack-netlib/SRC/zhbevd_2stage.c new file mode 100644 index 000000000..983f41fbd --- /dev/null +++ b/lapack-netlib/SRC/zhbevd_2stage.c @@ -0,0 +1,901 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + OTHER matrices */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBEVD_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, */ +/* WORK, LWORK, RWORK, LRWORK, IWORK, */ +/* LIWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of */ +/* > a complex Hermitian band matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. If eigenvectors are desired, it */ +/* > uses a divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. If UPLO = 'U', the first */ +/* > superdiagonal and the diagonal of the tridiagonal matrix T */ +/* > are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* > the diagonal and first subdiagonal of T are returned in the */ +/* > first two rows of AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* > eigenvectors of the matrix A, with the i-th column of Z */ +/* > holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = (2KD+1)*N + KD*NTHREADS */ +/* > where KD is the size of the band. */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, */ +/* > dimension (LRWORK) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of array RWORK. */ +/* > If N <= 1, LRWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of array IWORK. */ +/* > If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16OTHEReigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhbevd_2stage_(char *jobz, char *uplo, integer *n, + integer *kd, doublecomplex *ab, integer *ldab, doublereal *w, + doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, + doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + doublereal anrm; + integer imax; + extern /* Subroutine */ int zhetrd_hb2st_(char *, char *, char *, + integer *, integer *, doublecomplex *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *); + doublereal rmin, rmax; + integer llwk2; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo, indwk, lhtrd; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer lwmin; + logical lower; + integer lwtrd, llrwk; + logical wantz; + integer indwk2, ib; + extern doublereal dlamch_(char *); + integer iscale; + doublereal safmin; + extern doublereal zlanhb_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zstedc_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *, integer + *); + integer indrwk, liwmin; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer lrwmin, llwork; + doublereal smlnum; + logical lquery; + doublereal eps; + integer indhous; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1 || *lrwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + } else { + ib = ilaenv2stage_(&c__2, "ZHETRD_HB2ST", jobz, n, kd, &c_n1, &c_n1); + lhtrd = ilaenv2stage_(&c__3, "ZHETRD_HB2ST", jobz, n, kd, &ib, &c_n1); + lwtrd = ilaenv2stage_(&c__4, "ZHETRD_HB2ST", jobz, n, kd, &ib, &c_n1); + if (wantz) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = i__1 * i__1 << 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { +/* Computing MAX */ + i__1 = *n, i__2 = lhtrd + lwtrd; + lwmin = f2cmax(i__1,i__2); + lrwmin = *n; + liwmin = 1; + } + } + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info == 0) { + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -13; + } else if (*liwork < liwmin && ! lquery) { + *info = -15; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBEVD_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = ab_dim1 + 1; + w[1] = ab[i__1].r; + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + zlascl_("B", kd, kd, &c_b23, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + zlascl_("Q", kd, kd, &c_b23, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */ + + inde = 1; + indrwk = inde + *n; + llrwk = *lrwork - indrwk + 1; + indhous = 1; + indwk = indhous + lhtrd; + llwork = *lwork - indwk + 1; + indwk2 = indwk + *n * *n; + llwk2 = *lwork - indwk2 + 1; + + zhetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], & + rwork[inde], &work[indhous], &lhtrd, &work[indwk], &llwork, & + iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], & + llwk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + zgemm_("N", "N", n, n, n, &c_b2, &z__[z_offset], ldz, &work[1], n, & + c_b1, &work[indwk2], n); + zlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + return 0; + +/* End of ZHBEVD_2STAGE */ + +} /* zhbevd_2stage__ */ + diff --git a/lapack-netlib/SRC/zhbevx.c b/lapack-netlib/SRC/zhbevx.c new file mode 100644 index 000000000..90e827251 --- /dev/null +++ b/lapack-netlib/SRC/zhbevx.c @@ -0,0 +1,1015 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHBEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER +matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBEVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, */ +/* VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, */ +/* IWORK, IFAIL, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N */ +/* DOUBLE PRECISION ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBEVX computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian band matrix A. Eigenvalues and eigenvectors */ +/* > can be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found; */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found; */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ, N) */ +/* > If JOBZ = 'V', the N-by-N unitary matrix used in the */ +/* > reduction to tridiagonal form. */ +/* > If JOBZ = 'N', the array Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. If JOBZ = 'V', then */ +/* > LDQ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is DOUBLE PRECISION */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing AB to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*DLAMCH('S'). */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > The first M elements contain the selected eigenvalues in */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* > Their indices are stored in array IFAIL. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16OTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int zhbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, doublecomplex *ab, integer *ldab, doublecomplex *q, + integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer * + iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, + integer *ifail, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, + i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer indd, inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + logical test; + doublecomplex ctmp1; + integer itmp1, i__, j, indee; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo; + char order[1]; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical lower; + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + logical wantz; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + integer jj; + extern doublereal dlamch_(char *); + logical alleig, indeig; + integer iscale, indibl; + logical valeig; + doublereal safmin; + extern doublereal zlanhb_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal abstll, bignum; + integer indiwk, indisp; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *), + zhbtrd_(char *, char *, integer *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer indrwk, indwrk; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer nsplit; + doublereal smlnum; + extern /* Subroutine */ int zstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *), + zsteqr_(char *, integer *, doublereal *, doublereal *, + doublecomplex *, integer *, doublereal *, integer *); + doublereal eps, vll, vuu, tmp1; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lower = lsame_(uplo, "L"); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*ldab < *kd + 1) { + *info = -7; + } else if (wantz && *ldq < f2cmax(1,*n)) { + *info = -9; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -11; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -12; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -13; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBEVX", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + *m = 1; + if (lower) { + i__1 = ab_dim1 + 1; + ctmp1.r = ab[i__1].r, ctmp1.i = ab[i__1].i; + } else { + i__1 = *kd + 1 + ab_dim1; + ctmp1.r = ab[i__1].r, ctmp1.i = ab[i__1].i; + } + tmp1 = ctmp1.r; + if (valeig) { + if (! (*vl < tmp1 && *vu >= tmp1)) { + *m = 0; + } + } + if (*m == 1) { + w[1] = ctmp1.r; + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = f2cmin(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } else { + vll = 0.; + vuu = 0.; + } + anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + zlascl_("B", kd, kd, &c_b16, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + zlascl_("Q", kd, kd, &c_b16, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + indwrk = 1; + zhbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &rwork[indd], &rwork[ + inde], &q[q_offset], ldq, &work[indwrk], &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal */ +/* to zero, then call DSTERF or ZSTEQR. If this fails for some */ +/* eigenvalue, then try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + dcopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + dsterf_(n, &w[1], &rwork[indee], info); + } else { + zlacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); + i__1 = *n - 1; + dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + zsteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indd], & + rwork[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwk], info); + + if (wantz) { + zstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by ZSTEIN. */ + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); + zgemv_("N", n, n, &c_b2, &q[q_offset], ldq, &work[1], &c__1, & + c_b1, &z__[j * z_dim1 + 1], &c__1); +/* L20: */ + } + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L50: */ + } + } + + return 0; + +/* End of ZHBEVX */ + +} /* zhbevx_ */ + diff --git a/lapack-netlib/SRC/zhbevx_2stage.c b/lapack-netlib/SRC/zhbevx_2stage.c new file mode 100644 index 000000000..73ee89c4a --- /dev/null +++ b/lapack-netlib/SRC/zhbevx_2stage.c @@ -0,0 +1,1121 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + OTHER matrices */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBEVX_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, */ +/* Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, */ +/* Z, LDZ, WORK, LWORK, RWORK, IWORK, */ +/* IFAIL, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK */ +/* DOUBLE PRECISION ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian band matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. Eigenvalues and eigenvectors */ +/* > can be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found; */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found; */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ, N) */ +/* > If JOBZ = 'V', the N-by-N unitary matrix used in the */ +/* > reduction to tridiagonal form. */ +/* > If JOBZ = 'N', the array Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. If JOBZ = 'V', then */ +/* > LDQ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is DOUBLE PRECISION */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing AB to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*DLAMCH('S'). */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > The first M elements contain the selected eigenvalues in */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = (2KD+1)*N + KD*NTHREADS */ +/* > where KD is the size of the band. */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* > Their indices are stored in array IFAIL. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16OTHEReigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhbevx_2stage_(char *jobz, char *range, char *uplo, + integer *n, integer *kd, doublecomplex *ab, integer *ldab, + doublecomplex *q, integer *ldq, doublereal *vl, doublereal *vu, + integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * + w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer * + lwork, doublereal *rwork, integer *iwork, integer *ifail, integer * + info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, + i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer indd, inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + doublereal anrm; + integer imax; + extern /* Subroutine */ int zhetrd_hb2st_(char *, char *, char *, + integer *, integer *, doublecomplex *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *); + doublereal rmin, rmax; + logical test; + doublecomplex ctmp1; + integer itmp1, i__, j, indee; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo; + char order[1]; + integer lhtrd; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer lwmin; + logical lower; + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + integer lwtrd; + logical wantz; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + integer ib, jj; + extern doublereal dlamch_(char *); + logical alleig, indeig; + integer iscale, indibl; + logical valeig; + doublereal safmin; + extern doublereal zlanhb_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal abstll, bignum; + integer indiwk, indisp; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indrwk, indwrk; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer nsplit, llwork; + doublereal smlnum; + extern /* Subroutine */ int zstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *); + logical lquery; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *); + doublereal eps, vll, vuu; + integer indhous; + doublereal tmp1; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*ldab < *kd + 1) { + *info = -7; + } else if (wantz && *ldq < f2cmax(1,*n)) { + *info = -9; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -11; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -12; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -13; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -18; + } + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + } else { + ib = ilaenv2stage_(&c__2, "ZHETRD_HB2ST", jobz, n, kd, &c_n1, & + c_n1); + lhtrd = ilaenv2stage_(&c__3, "ZHETRD_HB2ST", jobz, n, kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "ZHETRD_HB2ST", jobz, n, kd, &ib, & + c_n1); + lwmin = lhtrd + lwtrd; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + } + + if (*lwork < lwmin && ! lquery) { + *info = -20; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBEVX_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + *m = 1; + if (lower) { + i__1 = ab_dim1 + 1; + ctmp1.r = ab[i__1].r, ctmp1.i = ab[i__1].i; + } else { + i__1 = *kd + 1 + ab_dim1; + ctmp1.r = ab[i__1].r, ctmp1.i = ab[i__1].i; + } + tmp1 = ctmp1.r; + if (valeig) { + if (! (*vl < tmp1 && *vu >= tmp1)) { + *m = 0; + } + } + if (*m == 1) { + w[1] = ctmp1.r; + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = f2cmin(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } else { + vll = 0.; + vuu = 0.; + } + anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + zlascl_("B", kd, kd, &c_b26, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + zlascl_("Q", kd, kd, &c_b26, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + + indhous = 1; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + + zhetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &rwork[indd], + &rwork[inde], &work[indhous], &lhtrd, &work[indwrk], &llwork, & + iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal */ +/* to zero, then call DSTERF or ZSTEQR. If this fails for some */ +/* eigenvalue, then try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + dcopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + dsterf_(n, &w[1], &rwork[indee], info); + } else { + zlacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); + i__1 = *n - 1; + dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + zsteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indd], & + rwork[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwk], info); + + if (wantz) { + zstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by ZSTEIN. */ + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); + zgemv_("N", n, n, &c_b2, &q[q_offset], ldq, &work[1], &c__1, & + c_b1, &z__[j * z_dim1 + 1], &c__1); +/* L20: */ + } + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L50: */ + } + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + + return 0; + +/* End of ZHBEVX_2STAGE */ + +} /* zhbevx_2stage__ */ + diff --git a/lapack-netlib/SRC/zhbgst.c b/lapack-netlib/SRC/zhbgst.c new file mode 100644 index 000000000..a2daaaa4e --- /dev/null +++ b/lapack-netlib/SRC/zhbgst.c @@ -0,0 +1,2606 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHBGST */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBGST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, */ +/* LDX, WORK, RWORK, INFO ) */ + +/* CHARACTER UPLO, VECT */ +/* INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N */ +/* DOUBLE PRECISION RWORK( * ) */ +/* COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */ +/* $ X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBGST reduces a complex Hermitian-definite banded generalized */ +/* > eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, */ +/* > such that C has the same bandwidth as A. */ +/* > */ +/* > B must have been previously factorized as S**H*S by ZPBSTF, using a */ +/* > split Cholesky factorization. A is overwritten by C = X**H*A*X, where */ +/* > X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the */ +/* > bandwidth of A. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > = 'N': do not form the transformation matrix X; */ +/* > = 'V': form X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KA */ +/* > \verbatim */ +/* > KA is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KB */ +/* > \verbatim */ +/* > KB is INTEGER */ +/* > The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB,N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first ka+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+ka). */ +/* > */ +/* > On exit, the transformed matrix X**H*A*X, stored in the same */ +/* > format as A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KA+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BB */ +/* > \verbatim */ +/* > BB is COMPLEX*16 array, dimension (LDBB,N) */ +/* > The banded factor S from the split Cholesky factorization of */ +/* > B, as returned by ZPBSTF, stored in the first kb+1 rows of */ +/* > the array. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBB */ +/* > \verbatim */ +/* > LDBB is INTEGER */ +/* > The leading dimension of the array BB. LDBB >= KB+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (LDX,N) */ +/* > If VECT = 'V', the n-by-n matrix X. */ +/* > If VECT = 'N', the array X is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. */ +/* > LDX >= f2cmax(1,N) if VECT = 'V'; LDX >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zhbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, + integer *ldbb, doublecomplex *x, integer *ldx, doublecomplex *work, + doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, x_dim1, x_offset, i__1, + i__2, i__3, i__4, i__5, i__6, i__7, i__8; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8, z__9, z__10; + + /* Local variables */ + integer inca; + extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *); + integer i__, j, k, l, m; + doublecomplex t; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer i0, i1; + logical upper; + integer i2, j1, j2; + extern /* Subroutine */ int zgeru_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + logical wantx; + extern /* Subroutine */ int zlar2v_(integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *); + doublecomplex ra; + integer nr, nx; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + logical update; + extern /* Subroutine */ int zlacgv_(integer *, doublecomplex *, integer *) + ; + integer ka1, kb1; + extern /* Subroutine */ int zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *), zlartg_(doublecomplex *, doublecomplex *, doublereal *, + doublecomplex *, doublecomplex *); + doublecomplex ra1; + extern /* Subroutine */ int zlargv_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, integer *); + integer j1t, j2t; + extern /* Subroutine */ int zlartv_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + integer *); + doublereal bii; + integer kbt, nrt; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1 * 1; + bb -= bb_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --work; + --rwork; + + /* Function Body */ + wantx = lsame_(vect, "V"); + upper = lsame_(uplo, "U"); + ka1 = *ka + 1; + kb1 = *kb + 1; + *info = 0; + if (! wantx && ! lsame_(vect, "N")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ka < 0) { + *info = -4; + } else if (*kb < 0 || *kb > *ka) { + *info = -5; + } else if (*ldab < *ka + 1) { + *info = -7; + } else if (*ldbb < *kb + 1) { + *info = -9; + } else if (*ldx < 1 || wantx && *ldx < f2cmax(1,*n)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBGST", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + inca = *ldab * ka1; + +/* Initialize X to the unit matrix, if needed */ + + if (wantx) { + zlaset_("Full", n, n, &c_b1, &c_b2, &x[x_offset], ldx); + } + +/* Set M to the splitting point m. It must be the same value as is */ +/* used in ZPBSTF. The chosen value allows the arrays WORK and RWORK */ +/* to be of dimension (N). */ + + m = (*n + *kb) / 2; + +/* The routine works in two phases, corresponding to the two halves */ +/* of the split Cholesky factorization of B as S**H*S where */ + +/* S = ( U ) */ +/* ( M L ) */ + +/* with U upper triangular of order m, and L lower triangular of */ +/* order n-m. S has the same bandwidth as B. */ + +/* S is treated as a product of elementary matrices: */ + +/* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) */ + +/* where S(i) is determined by the i-th row of S. */ + +/* In phase 1, the index i takes the values n, n-1, ... , m+1; */ +/* in phase 2, it takes the values 1, 2, ... , m. */ + +/* For each value of i, the current matrix A is updated by forming */ +/* inv(S(i))**H*A*inv(S(i)). This creates a triangular bulge outside */ +/* the band of A. The bulge is then pushed down toward the bottom of */ +/* A in phase 1, and up toward the top of A in phase 2, by applying */ +/* plane rotations. */ + +/* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 */ +/* of them are linearly independent, so annihilating a bulge requires */ +/* only 2*kb-1 plane rotations. The rotations are divided into a 1st */ +/* set of kb-1 rotations, and a 2nd set of kb rotations. */ + +/* Wherever possible, rotations are generated and applied in vector */ +/* operations of length NR between the indices J1 and J2 (sometimes */ +/* replaced by modified values NRT, J1T or J2T). */ + +/* The real cosines and complex sines of the rotations are stored in */ +/* the arrays RWORK and WORK, those of the 1st set in elements */ +/* 2:m-kb-1, and those of the 2nd set in elements m-kb+1:n. */ + +/* The bulges are not formed explicitly; nonzero elements outside the */ +/* band are created only when they are required for generating new */ +/* rotations; they are stored in the array WORK, in positions where */ +/* they are later overwritten by the sines of the rotations which */ +/* annihilate them. */ + +/* **************************** Phase 1 ***************************** */ + +/* The logical structure of this phase is: */ + +/* UPDATE = .TRUE. */ +/* DO I = N, M + 1, -1 */ +/* use S(i) to update A and create a new bulge */ +/* apply rotations to push all bulges KA positions downward */ +/* END DO */ +/* UPDATE = .FALSE. */ +/* DO I = M + KA + 1, N - 1 */ +/* apply rotations to push all bulges KA positions downward */ +/* END DO */ + +/* To avoid duplicating code, the two loops are merged. */ + + update = TRUE_; + i__ = *n + 1; +L10: + if (update) { + --i__; +/* Computing MIN */ + i__1 = *kb, i__2 = i__ - 1; + kbt = f2cmin(i__1,i__2); + i0 = i__ - 1; +/* Computing MIN */ + i__1 = *n, i__2 = i__ + *ka; + i1 = f2cmin(i__1,i__2); + i2 = i__ - kbt + ka1; + if (i__ < m + 1) { + update = FALSE_; + ++i__; + i0 = m; + if (*ka == 0) { + goto L480; + } + goto L10; + } + } else { + i__ += *ka; + if (i__ > *n - 1) { + goto L480; + } + } + + if (upper) { + +/* Transform A, working with the upper triangle */ + + if (update) { + +/* Form inv(S(i))**H * A * inv(S(i)) */ + + i__1 = kb1 + i__ * bb_dim1; + bii = bb[i__1].r; + i__1 = ka1 + i__ * ab_dim1; + i__2 = ka1 + i__ * ab_dim1; + d__1 = ab[i__2].r / bii / bii; + ab[i__1].r = d__1, ab[i__1].i = 0.; + i__1 = i1; + for (j = i__ + 1; j <= i__1; ++j) { + i__2 = i__ - j + ka1 + j * ab_dim1; + i__3 = i__ - j + ka1 + j * ab_dim1; + z__1.r = ab[i__3].r / bii, z__1.i = ab[i__3].i / bii; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; +/* L20: */ + } +/* Computing MAX */ + i__1 = 1, i__2 = i__ - *ka; + i__3 = i__ - 1; + for (j = f2cmax(i__1,i__2); j <= i__3; ++j) { + i__1 = j - i__ + ka1 + i__ * ab_dim1; + i__2 = j - i__ + ka1 + i__ * ab_dim1; + z__1.r = ab[i__2].r / bii, z__1.i = ab[i__2].i / bii; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L30: */ + } + i__3 = i__ - 1; + for (k = i__ - kbt; k <= i__3; ++k) { + i__1 = k; + for (j = i__ - kbt; j <= i__1; ++j) { + i__2 = j - k + ka1 + k * ab_dim1; + i__4 = j - k + ka1 + k * ab_dim1; + i__5 = j - i__ + kb1 + i__ * bb_dim1; + d_cnjg(&z__5, &ab[k - i__ + ka1 + i__ * ab_dim1]); + z__4.r = bb[i__5].r * z__5.r - bb[i__5].i * z__5.i, + z__4.i = bb[i__5].r * z__5.i + bb[i__5].i * + z__5.r; + z__3.r = ab[i__4].r - z__4.r, z__3.i = ab[i__4].i - + z__4.i; + d_cnjg(&z__7, &bb[k - i__ + kb1 + i__ * bb_dim1]); + i__6 = j - i__ + ka1 + i__ * ab_dim1; + z__6.r = z__7.r * ab[i__6].r - z__7.i * ab[i__6].i, + z__6.i = z__7.r * ab[i__6].i + z__7.i * ab[i__6] + .r; + z__2.r = z__3.r - z__6.r, z__2.i = z__3.i - z__6.i; + i__7 = ka1 + i__ * ab_dim1; + d__1 = ab[i__7].r; + i__8 = j - i__ + kb1 + i__ * bb_dim1; + z__9.r = d__1 * bb[i__8].r, z__9.i = d__1 * bb[i__8].i; + d_cnjg(&z__10, &bb[k - i__ + kb1 + i__ * bb_dim1]); + z__8.r = z__9.r * z__10.r - z__9.i * z__10.i, z__8.i = + z__9.r * z__10.i + z__9.i * z__10.r; + z__1.r = z__2.r + z__8.r, z__1.i = z__2.i + z__8.i; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; +/* L40: */ + } +/* Computing MAX */ + i__1 = 1, i__2 = i__ - *ka; + i__4 = i__ - kbt - 1; + for (j = f2cmax(i__1,i__2); j <= i__4; ++j) { + i__1 = j - k + ka1 + k * ab_dim1; + i__2 = j - k + ka1 + k * ab_dim1; + d_cnjg(&z__3, &bb[k - i__ + kb1 + i__ * bb_dim1]); + i__5 = j - i__ + ka1 + i__ * ab_dim1; + z__2.r = z__3.r * ab[i__5].r - z__3.i * ab[i__5].i, + z__2.i = z__3.r * ab[i__5].i + z__3.i * ab[i__5] + .r; + z__1.r = ab[i__2].r - z__2.r, z__1.i = ab[i__2].i - + z__2.i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L50: */ + } +/* L60: */ + } + i__3 = i1; + for (j = i__; j <= i__3; ++j) { +/* Computing MAX */ + i__4 = j - *ka, i__1 = i__ - kbt; + i__2 = i__ - 1; + for (k = f2cmax(i__4,i__1); k <= i__2; ++k) { + i__4 = k - j + ka1 + j * ab_dim1; + i__1 = k - j + ka1 + j * ab_dim1; + i__5 = k - i__ + kb1 + i__ * bb_dim1; + i__6 = i__ - j + ka1 + j * ab_dim1; + z__2.r = bb[i__5].r * ab[i__6].r - bb[i__5].i * ab[i__6] + .i, z__2.i = bb[i__5].r * ab[i__6].i + bb[i__5].i + * ab[i__6].r; + z__1.r = ab[i__1].r - z__2.r, z__1.i = ab[i__1].i - + z__2.i; + ab[i__4].r = z__1.r, ab[i__4].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + i__3 = *n - m; + d__1 = 1. / bii; + zdscal_(&i__3, &d__1, &x[m + 1 + i__ * x_dim1], &c__1); + if (kbt > 0) { + i__3 = *n - m; + z__1.r = -1., z__1.i = 0.; + zgerc_(&i__3, &kbt, &z__1, &x[m + 1 + i__ * x_dim1], & + c__1, &bb[kb1 - kbt + i__ * bb_dim1], &c__1, &x[m + + 1 + (i__ - kbt) * x_dim1], ldx); + } + } + +/* store a(i,i1) in RA1 for use in next loop over K */ + + i__3 = i__ - i1 + ka1 + i1 * ab_dim1; + ra1.r = ab[i__3].r, ra1.i = ab[i__3].i; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions down toward the bottom of the */ +/* band */ + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ - k + *ka < *n && i__ - k > 1) { + +/* generate rotation to annihilate a(i,i-k+ka+1) */ + + zlartg_(&ab[k + 1 + (i__ - k + *ka) * ab_dim1], &ra1, & + rwork[i__ - k + *ka - m], &work[i__ - k + *ka - m] + , &ra); + +/* create nonzero element a(i-k,i-k+ka+1) outside the */ +/* band and store it in WORK(i-k) */ + + i__2 = kb1 - k + i__ * bb_dim1; + z__2.r = -bb[i__2].r, z__2.i = -bb[i__2].i; + z__1.r = z__2.r * ra1.r - z__2.i * ra1.i, z__1.i = z__2.r + * ra1.i + z__2.i * ra1.r; + t.r = z__1.r, t.i = z__1.i; + i__2 = i__ - k; + i__4 = i__ - k + *ka - m; + z__2.r = rwork[i__4] * t.r, z__2.i = rwork[i__4] * t.i; + d_cnjg(&z__4, &work[i__ - k + *ka - m]); + i__1 = (i__ - k + *ka) * ab_dim1 + 1; + z__3.r = z__4.r * ab[i__1].r - z__4.i * ab[i__1].i, + z__3.i = z__4.r * ab[i__1].i + z__4.i * ab[i__1] + .r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; + i__2 = (i__ - k + *ka) * ab_dim1 + 1; + i__4 = i__ - k + *ka - m; + z__2.r = work[i__4].r * t.r - work[i__4].i * t.i, z__2.i = + work[i__4].r * t.i + work[i__4].i * t.r; + i__1 = i__ - k + *ka - m; + i__5 = (i__ - k + *ka) * ab_dim1 + 1; + z__3.r = rwork[i__1] * ab[i__5].r, z__3.i = rwork[i__1] * + ab[i__5].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; + ra1.r = ra.r, ra1.i = ra.i; + } + } +/* Computing MAX */ + i__2 = 1, i__4 = k - i0 + 2; + j2 = i__ - k - 1 + f2cmax(i__2,i__4) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (update) { +/* Computing MAX */ + i__2 = j2, i__4 = i__ + (*ka << 1) - k + 1; + j2t = f2cmax(i__2,i__4); + } else { + j2t = j2; + } + nrt = (*n - j2t + *ka) / ka1; + i__2 = j1; + i__4 = ka1; + for (j = j2t; i__4 < 0 ? j >= i__2 : j <= i__2; j += i__4) { + +/* create nonzero element a(j-ka,j+1) outside the band */ +/* and store it in WORK(j-m) */ + + i__1 = j - m; + i__5 = j - m; + i__6 = (j + 1) * ab_dim1 + 1; + z__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * ab[i__6] + .i, z__1.i = work[i__5].r * ab[i__6].i + work[i__5].i + * ab[i__6].r; + work[i__1].r = z__1.r, work[i__1].i = z__1.i; + i__1 = (j + 1) * ab_dim1 + 1; + i__5 = j - m; + i__6 = (j + 1) * ab_dim1 + 1; + z__1.r = rwork[i__5] * ab[i__6].r, z__1.i = rwork[i__5] * ab[ + i__6].i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L90: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + zlargv_(&nrt, &ab[j2t * ab_dim1 + 1], &inca, &work[j2t - m], & + ka1, &rwork[j2t - m], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the right */ + + i__4 = *ka - 1; + for (l = 1; l <= i__4; ++l) { + zlartv_(&nr, &ab[ka1 - l + j2 * ab_dim1], &inca, &ab[*ka + - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2 - m], + &work[j2 - m], &ka1); +/* L100: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + zlar2v_(&nr, &ab[ka1 + j2 * ab_dim1], &ab[ka1 + (j2 + 1) * + ab_dim1], &ab[*ka + (j2 + 1) * ab_dim1], &inca, & + rwork[j2 - m], &work[j2 - m], &ka1); + + zlacgv_(&nr, &work[j2 - m], &ka1); + } + +/* start applying rotations in 1st set from the left */ + + i__4 = *kb - k + 1; + for (l = *ka - 1; l >= i__4; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & + ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & + rwork[j2 - m], &work[j2 - m], &ka1); + } +/* L110: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__4 = j1; + i__2 = ka1; + for (j = j2; i__2 < 0 ? j >= i__4 : j <= i__4; j += i__2) { + i__1 = *n - m; + d_cnjg(&z__1, &work[j - m]); + zrot_(&i__1, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &rwork[j - m], &z__1); +/* L120: */ + } + } +/* L130: */ + } + + if (update) { + if (i2 <= *n && kbt > 0) { + +/* create nonzero element a(i-kbt,i-kbt+ka+1) outside the */ +/* band and store it in WORK(i-kbt) */ + + i__3 = i__ - kbt; + i__2 = kb1 - kbt + i__ * bb_dim1; + z__2.r = -bb[i__2].r, z__2.i = -bb[i__2].i; + z__1.r = z__2.r * ra1.r - z__2.i * ra1.i, z__1.i = z__2.r * + ra1.i + z__2.i * ra1.r; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__3 = 2, i__2 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__3,i__2) * ka1; + } else { +/* Computing MAX */ + i__3 = 1, i__2 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__3,i__2) * ka1; + } + +/* finish applying rotations in 2nd set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + *ka + l) / ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[l + (j2 - l + 1) * ab_dim1], &inca, &ab[ + l + 1 + (j2 - l + 1) * ab_dim1], &inca, &rwork[j2 + - *ka], &work[j2 - *ka], &ka1); + } +/* L140: */ + } + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + i__3 = j2; + i__2 = -ka1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { + i__4 = j; + i__1 = j - *ka; + work[i__4].r = work[i__1].r, work[i__4].i = work[i__1].i; + rwork[j] = rwork[j - *ka]; +/* L150: */ + } + i__2 = j1; + i__3 = ka1; + for (j = j2; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { + +/* create nonzero element a(j-ka,j+1) outside the band */ +/* and store it in WORK(j) */ + + i__4 = j; + i__1 = j; + i__5 = (j + 1) * ab_dim1 + 1; + z__1.r = work[i__1].r * ab[i__5].r - work[i__1].i * ab[i__5] + .i, z__1.i = work[i__1].r * ab[i__5].i + work[i__1].i + * ab[i__5].r; + work[i__4].r = z__1.r, work[i__4].i = z__1.i; + i__4 = (j + 1) * ab_dim1 + 1; + i__1 = j; + i__5 = (j + 1) * ab_dim1 + 1; + z__1.r = rwork[i__1] * ab[i__5].r, z__1.i = rwork[i__1] * ab[ + i__5].i; + ab[i__4].r = z__1.r, ab[i__4].i = z__1.i; +/* L160: */ + } + if (update) { + if (i__ - k < *n - *ka && k <= kbt) { + i__3 = i__ - k + *ka; + i__2 = i__ - k; + work[i__3].r = work[i__2].r, work[i__3].i = work[i__2].i; + } + } +/* L170: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__3 = 1, i__2 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__3,i__2) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + zlargv_(&nr, &ab[j2 * ab_dim1 + 1], &inca, &work[j2], &ka1, & + rwork[j2], &ka1); + +/* apply rotations in 2nd set from the right */ + + i__3 = *ka - 1; + for (l = 1; l <= i__3; ++l) { + zlartv_(&nr, &ab[ka1 - l + j2 * ab_dim1], &inca, &ab[*ka + - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2], & + work[j2], &ka1); +/* L180: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + zlar2v_(&nr, &ab[ka1 + j2 * ab_dim1], &ab[ka1 + (j2 + 1) * + ab_dim1], &ab[*ka + (j2 + 1) * ab_dim1], &inca, & + rwork[j2], &work[j2], &ka1); + + zlacgv_(&nr, &work[j2], &ka1); + } + +/* start applying rotations in 2nd set from the left */ + + i__3 = *kb - k + 1; + for (l = *ka - 1; l >= i__3; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & + ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & + rwork[j2], &work[j2], &ka1); + } +/* L190: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__3 = j1; + i__2 = ka1; + for (j = j2; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { + i__4 = *n - m; + d_cnjg(&z__1, &work[j]); + zrot_(&i__4, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &rwork[j], &z__1); +/* L200: */ + } + } +/* L210: */ + } + + i__2 = *kb - 1; + for (k = 1; k <= i__2; ++k) { +/* Computing MAX */ + i__3 = 1, i__4 = k - i0 + 2; + j2 = i__ - k - 1 + f2cmax(i__3,i__4) * ka1; + +/* finish applying rotations in 1st set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & + ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & + rwork[j2 - m], &work[j2 - m], &ka1); + } +/* L220: */ + } +/* L230: */ + } + + if (*kb > 1) { + i__2 = j2 + *ka; + for (j = *n - 1; j >= i__2; --j) { + rwork[j - m] = rwork[j - *ka - m]; + i__3 = j - m; + i__4 = j - *ka - m; + work[i__3].r = work[i__4].r, work[i__3].i = work[i__4].i; +/* L240: */ + } + } + + } else { + +/* Transform A, working with the lower triangle */ + + if (update) { + +/* Form inv(S(i))**H * A * inv(S(i)) */ + + i__2 = i__ * bb_dim1 + 1; + bii = bb[i__2].r; + i__2 = i__ * ab_dim1 + 1; + i__3 = i__ * ab_dim1 + 1; + d__1 = ab[i__3].r / bii / bii; + ab[i__2].r = d__1, ab[i__2].i = 0.; + i__2 = i1; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j - i__ + 1 + i__ * ab_dim1; + i__4 = j - i__ + 1 + i__ * ab_dim1; + z__1.r = ab[i__4].r / bii, z__1.i = ab[i__4].i / bii; + ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; +/* L250: */ + } +/* Computing MAX */ + i__2 = 1, i__3 = i__ - *ka; + i__4 = i__ - 1; + for (j = f2cmax(i__2,i__3); j <= i__4; ++j) { + i__2 = i__ - j + 1 + j * ab_dim1; + i__3 = i__ - j + 1 + j * ab_dim1; + z__1.r = ab[i__3].r / bii, z__1.i = ab[i__3].i / bii; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; +/* L260: */ + } + i__4 = i__ - 1; + for (k = i__ - kbt; k <= i__4; ++k) { + i__2 = k; + for (j = i__ - kbt; j <= i__2; ++j) { + i__3 = k - j + 1 + j * ab_dim1; + i__1 = k - j + 1 + j * ab_dim1; + i__5 = i__ - j + 1 + j * bb_dim1; + d_cnjg(&z__5, &ab[i__ - k + 1 + k * ab_dim1]); + z__4.r = bb[i__5].r * z__5.r - bb[i__5].i * z__5.i, + z__4.i = bb[i__5].r * z__5.i + bb[i__5].i * + z__5.r; + z__3.r = ab[i__1].r - z__4.r, z__3.i = ab[i__1].i - + z__4.i; + d_cnjg(&z__7, &bb[i__ - k + 1 + k * bb_dim1]); + i__6 = i__ - j + 1 + j * ab_dim1; + z__6.r = z__7.r * ab[i__6].r - z__7.i * ab[i__6].i, + z__6.i = z__7.r * ab[i__6].i + z__7.i * ab[i__6] + .r; + z__2.r = z__3.r - z__6.r, z__2.i = z__3.i - z__6.i; + i__7 = i__ * ab_dim1 + 1; + d__1 = ab[i__7].r; + i__8 = i__ - j + 1 + j * bb_dim1; + z__9.r = d__1 * bb[i__8].r, z__9.i = d__1 * bb[i__8].i; + d_cnjg(&z__10, &bb[i__ - k + 1 + k * bb_dim1]); + z__8.r = z__9.r * z__10.r - z__9.i * z__10.i, z__8.i = + z__9.r * z__10.i + z__9.i * z__10.r; + z__1.r = z__2.r + z__8.r, z__1.i = z__2.i + z__8.i; + ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; +/* L270: */ + } +/* Computing MAX */ + i__2 = 1, i__3 = i__ - *ka; + i__1 = i__ - kbt - 1; + for (j = f2cmax(i__2,i__3); j <= i__1; ++j) { + i__2 = k - j + 1 + j * ab_dim1; + i__3 = k - j + 1 + j * ab_dim1; + d_cnjg(&z__3, &bb[i__ - k + 1 + k * bb_dim1]); + i__5 = i__ - j + 1 + j * ab_dim1; + z__2.r = z__3.r * ab[i__5].r - z__3.i * ab[i__5].i, + z__2.i = z__3.r * ab[i__5].i + z__3.i * ab[i__5] + .r; + z__1.r = ab[i__3].r - z__2.r, z__1.i = ab[i__3].i - + z__2.i; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; +/* L280: */ + } +/* L290: */ + } + i__4 = i1; + for (j = i__; j <= i__4; ++j) { +/* Computing MAX */ + i__1 = j - *ka, i__2 = i__ - kbt; + i__3 = i__ - 1; + for (k = f2cmax(i__1,i__2); k <= i__3; ++k) { + i__1 = j - k + 1 + k * ab_dim1; + i__2 = j - k + 1 + k * ab_dim1; + i__5 = i__ - k + 1 + k * bb_dim1; + i__6 = j - i__ + 1 + i__ * ab_dim1; + z__2.r = bb[i__5].r * ab[i__6].r - bb[i__5].i * ab[i__6] + .i, z__2.i = bb[i__5].r * ab[i__6].i + bb[i__5].i + * ab[i__6].r; + z__1.r = ab[i__2].r - z__2.r, z__1.i = ab[i__2].i - + z__2.i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L300: */ + } +/* L310: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + i__4 = *n - m; + d__1 = 1. / bii; + zdscal_(&i__4, &d__1, &x[m + 1 + i__ * x_dim1], &c__1); + if (kbt > 0) { + i__4 = *n - m; + z__1.r = -1., z__1.i = 0.; + i__3 = *ldbb - 1; + zgeru_(&i__4, &kbt, &z__1, &x[m + 1 + i__ * x_dim1], & + c__1, &bb[kbt + 1 + (i__ - kbt) * bb_dim1], &i__3, + &x[m + 1 + (i__ - kbt) * x_dim1], ldx); + } + } + +/* store a(i1,i) in RA1 for use in next loop over K */ + + i__4 = i1 - i__ + 1 + i__ * ab_dim1; + ra1.r = ab[i__4].r, ra1.i = ab[i__4].i; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions down toward the bottom of the */ +/* band */ + + i__4 = *kb - 1; + for (k = 1; k <= i__4; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ - k + *ka < *n && i__ - k > 1) { + +/* generate rotation to annihilate a(i-k+ka+1,i) */ + + zlartg_(&ab[ka1 - k + i__ * ab_dim1], &ra1, &rwork[i__ - + k + *ka - m], &work[i__ - k + *ka - m], &ra); + +/* create nonzero element a(i-k+ka+1,i-k) outside the */ +/* band and store it in WORK(i-k) */ + + i__3 = k + 1 + (i__ - k) * bb_dim1; + z__2.r = -bb[i__3].r, z__2.i = -bb[i__3].i; + z__1.r = z__2.r * ra1.r - z__2.i * ra1.i, z__1.i = z__2.r + * ra1.i + z__2.i * ra1.r; + t.r = z__1.r, t.i = z__1.i; + i__3 = i__ - k; + i__1 = i__ - k + *ka - m; + z__2.r = rwork[i__1] * t.r, z__2.i = rwork[i__1] * t.i; + d_cnjg(&z__4, &work[i__ - k + *ka - m]); + i__2 = ka1 + (i__ - k) * ab_dim1; + z__3.r = z__4.r * ab[i__2].r - z__4.i * ab[i__2].i, + z__3.i = z__4.r * ab[i__2].i + z__4.i * ab[i__2] + .r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + i__3 = ka1 + (i__ - k) * ab_dim1; + i__1 = i__ - k + *ka - m; + z__2.r = work[i__1].r * t.r - work[i__1].i * t.i, z__2.i = + work[i__1].r * t.i + work[i__1].i * t.r; + i__2 = i__ - k + *ka - m; + i__5 = ka1 + (i__ - k) * ab_dim1; + z__3.r = rwork[i__2] * ab[i__5].r, z__3.i = rwork[i__2] * + ab[i__5].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; + ra1.r = ra.r, ra1.i = ra.i; + } + } +/* Computing MAX */ + i__3 = 1, i__1 = k - i0 + 2; + j2 = i__ - k - 1 + f2cmax(i__3,i__1) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (update) { +/* Computing MAX */ + i__3 = j2, i__1 = i__ + (*ka << 1) - k + 1; + j2t = f2cmax(i__3,i__1); + } else { + j2t = j2; + } + nrt = (*n - j2t + *ka) / ka1; + i__3 = j1; + i__1 = ka1; + for (j = j2t; i__1 < 0 ? j >= i__3 : j <= i__3; j += i__1) { + +/* create nonzero element a(j+1,j-ka) outside the band */ +/* and store it in WORK(j-m) */ + + i__2 = j - m; + i__5 = j - m; + i__6 = ka1 + (j - *ka + 1) * ab_dim1; + z__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * ab[i__6] + .i, z__1.i = work[i__5].r * ab[i__6].i + work[i__5].i + * ab[i__6].r; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; + i__2 = ka1 + (j - *ka + 1) * ab_dim1; + i__5 = j - m; + i__6 = ka1 + (j - *ka + 1) * ab_dim1; + z__1.r = rwork[i__5] * ab[i__6].r, z__1.i = rwork[i__5] * ab[ + i__6].i; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; +/* L320: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + zlargv_(&nrt, &ab[ka1 + (j2t - *ka) * ab_dim1], &inca, &work[ + j2t - m], &ka1, &rwork[j2t - m], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the left */ + + i__1 = *ka - 1; + for (l = 1; l <= i__1; ++l) { + zlartv_(&nr, &ab[l + 1 + (j2 - l) * ab_dim1], &inca, &ab[ + l + 2 + (j2 - l) * ab_dim1], &inca, &rwork[j2 - m] + , &work[j2 - m], &ka1); +/* L330: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + zlar2v_(&nr, &ab[j2 * ab_dim1 + 1], &ab[(j2 + 1) * ab_dim1 + + 1], &ab[j2 * ab_dim1 + 2], &inca, &rwork[j2 - m], & + work[j2 - m], &ka1); + + zlacgv_(&nr, &work[j2 - m], &ka1); + } + +/* start applying rotations in 1st set from the right */ + + i__1 = *kb - k + 1; + for (l = *ka - 1; l >= i__1; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ + ka1 - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2 - + m], &work[j2 - m], &ka1); + } +/* L340: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__1 = j1; + i__3 = ka1; + for (j = j2; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { + i__2 = *n - m; + zrot_(&i__2, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &rwork[j - m], &work[j - m] + ); +/* L350: */ + } + } +/* L360: */ + } + + if (update) { + if (i2 <= *n && kbt > 0) { + +/* create nonzero element a(i-kbt+ka+1,i-kbt) outside the */ +/* band and store it in WORK(i-kbt) */ + + i__4 = i__ - kbt; + i__3 = kbt + 1 + (i__ - kbt) * bb_dim1; + z__2.r = -bb[i__3].r, z__2.i = -bb[i__3].i; + z__1.r = z__2.r * ra1.r - z__2.i * ra1.i, z__1.i = z__2.r * + ra1.i + z__2.i * ra1.r; + work[i__4].r = z__1.r, work[i__4].i = z__1.i; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__4 = 2, i__3 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__4,i__3) * ka1; + } else { +/* Computing MAX */ + i__4 = 1, i__3 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__4,i__3) * ka1; + } + +/* finish applying rotations in 2nd set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + *ka + l) / ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[ka1 - l + 1 + (j2 - *ka) * ab_dim1], & + inca, &ab[ka1 - l + (j2 - *ka + 1) * ab_dim1], & + inca, &rwork[j2 - *ka], &work[j2 - *ka], &ka1); + } +/* L370: */ + } + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + i__4 = j2; + i__3 = -ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + i__1 = j; + i__2 = j - *ka; + work[i__1].r = work[i__2].r, work[i__1].i = work[i__2].i; + rwork[j] = rwork[j - *ka]; +/* L380: */ + } + i__3 = j1; + i__4 = ka1; + for (j = j2; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j+1,j-ka) outside the band */ +/* and store it in WORK(j) */ + + i__1 = j; + i__2 = j; + i__5 = ka1 + (j - *ka + 1) * ab_dim1; + z__1.r = work[i__2].r * ab[i__5].r - work[i__2].i * ab[i__5] + .i, z__1.i = work[i__2].r * ab[i__5].i + work[i__2].i + * ab[i__5].r; + work[i__1].r = z__1.r, work[i__1].i = z__1.i; + i__1 = ka1 + (j - *ka + 1) * ab_dim1; + i__2 = j; + i__5 = ka1 + (j - *ka + 1) * ab_dim1; + z__1.r = rwork[i__2] * ab[i__5].r, z__1.i = rwork[i__2] * ab[ + i__5].i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L390: */ + } + if (update) { + if (i__ - k < *n - *ka && k <= kbt) { + i__4 = i__ - k + *ka; + i__3 = i__ - k; + work[i__4].r = work[i__3].r, work[i__4].i = work[i__3].i; + } + } +/* L400: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__4 = 1, i__3 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__4,i__3) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + zlargv_(&nr, &ab[ka1 + (j2 - *ka) * ab_dim1], &inca, &work[j2] + , &ka1, &rwork[j2], &ka1); + +/* apply rotations in 2nd set from the left */ + + i__4 = *ka - 1; + for (l = 1; l <= i__4; ++l) { + zlartv_(&nr, &ab[l + 1 + (j2 - l) * ab_dim1], &inca, &ab[ + l + 2 + (j2 - l) * ab_dim1], &inca, &rwork[j2], & + work[j2], &ka1); +/* L410: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + zlar2v_(&nr, &ab[j2 * ab_dim1 + 1], &ab[(j2 + 1) * ab_dim1 + + 1], &ab[j2 * ab_dim1 + 2], &inca, &rwork[j2], &work[ + j2], &ka1); + + zlacgv_(&nr, &work[j2], &ka1); + } + +/* start applying rotations in 2nd set from the right */ + + i__4 = *kb - k + 1; + for (l = *ka - 1; l >= i__4; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ + ka1 - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2], + &work[j2], &ka1); + } +/* L420: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__4 = j1; + i__3 = ka1; + for (j = j2; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + i__1 = *n - m; + zrot_(&i__1, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &rwork[j], &work[j]); +/* L430: */ + } + } +/* L440: */ + } + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { +/* Computing MAX */ + i__4 = 1, i__1 = k - i0 + 2; + j2 = i__ - k - 1 + f2cmax(i__4,i__1) * ka1; + +/* finish applying rotations in 1st set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ + ka1 - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2 - + m], &work[j2 - m], &ka1); + } +/* L450: */ + } +/* L460: */ + } + + if (*kb > 1) { + i__3 = j2 + *ka; + for (j = *n - 1; j >= i__3; --j) { + rwork[j - m] = rwork[j - *ka - m]; + i__4 = j - m; + i__1 = j - *ka - m; + work[i__4].r = work[i__1].r, work[i__4].i = work[i__1].i; +/* L470: */ + } + } + + } + + goto L10; + +L480: + +/* **************************** Phase 2 ***************************** */ + +/* The logical structure of this phase is: */ + +/* UPDATE = .TRUE. */ +/* DO I = 1, M */ +/* use S(i) to update A and create a new bulge */ +/* apply rotations to push all bulges KA positions upward */ +/* END DO */ +/* UPDATE = .FALSE. */ +/* DO I = M - KA - 1, 2, -1 */ +/* apply rotations to push all bulges KA positions upward */ +/* END DO */ + +/* To avoid duplicating code, the two loops are merged. */ + + update = TRUE_; + i__ = 0; +L490: + if (update) { + ++i__; +/* Computing MIN */ + i__3 = *kb, i__4 = m - i__; + kbt = f2cmin(i__3,i__4); + i0 = i__ + 1; +/* Computing MAX */ + i__3 = 1, i__4 = i__ - *ka; + i1 = f2cmax(i__3,i__4); + i2 = i__ + kbt - ka1; + if (i__ > m) { + update = FALSE_; + --i__; + i0 = m + 1; + if (*ka == 0) { + return 0; + } + goto L490; + } + } else { + i__ -= *ka; + if (i__ < 2) { + return 0; + } + } + + if (i__ < m - kbt) { + nx = m; + } else { + nx = *n; + } + + if (upper) { + +/* Transform A, working with the upper triangle */ + + if (update) { + +/* Form inv(S(i))**H * A * inv(S(i)) */ + + i__3 = kb1 + i__ * bb_dim1; + bii = bb[i__3].r; + i__3 = ka1 + i__ * ab_dim1; + i__4 = ka1 + i__ * ab_dim1; + d__1 = ab[i__4].r / bii / bii; + ab[i__3].r = d__1, ab[i__3].i = 0.; + i__3 = i__ - 1; + for (j = i1; j <= i__3; ++j) { + i__4 = j - i__ + ka1 + i__ * ab_dim1; + i__1 = j - i__ + ka1 + i__ * ab_dim1; + z__1.r = ab[i__1].r / bii, z__1.i = ab[i__1].i / bii; + ab[i__4].r = z__1.r, ab[i__4].i = z__1.i; +/* L500: */ + } +/* Computing MIN */ + i__4 = *n, i__1 = i__ + *ka; + i__3 = f2cmin(i__4,i__1); + for (j = i__ + 1; j <= i__3; ++j) { + i__4 = i__ - j + ka1 + j * ab_dim1; + i__1 = i__ - j + ka1 + j * ab_dim1; + z__1.r = ab[i__1].r / bii, z__1.i = ab[i__1].i / bii; + ab[i__4].r = z__1.r, ab[i__4].i = z__1.i; +/* L510: */ + } + i__3 = i__ + kbt; + for (k = i__ + 1; k <= i__3; ++k) { + i__4 = i__ + kbt; + for (j = k; j <= i__4; ++j) { + i__1 = k - j + ka1 + j * ab_dim1; + i__2 = k - j + ka1 + j * ab_dim1; + i__5 = i__ - j + kb1 + j * bb_dim1; + d_cnjg(&z__5, &ab[i__ - k + ka1 + k * ab_dim1]); + z__4.r = bb[i__5].r * z__5.r - bb[i__5].i * z__5.i, + z__4.i = bb[i__5].r * z__5.i + bb[i__5].i * + z__5.r; + z__3.r = ab[i__2].r - z__4.r, z__3.i = ab[i__2].i - + z__4.i; + d_cnjg(&z__7, &bb[i__ - k + kb1 + k * bb_dim1]); + i__6 = i__ - j + ka1 + j * ab_dim1; + z__6.r = z__7.r * ab[i__6].r - z__7.i * ab[i__6].i, + z__6.i = z__7.r * ab[i__6].i + z__7.i * ab[i__6] + .r; + z__2.r = z__3.r - z__6.r, z__2.i = z__3.i - z__6.i; + i__7 = ka1 + i__ * ab_dim1; + d__1 = ab[i__7].r; + i__8 = i__ - j + kb1 + j * bb_dim1; + z__9.r = d__1 * bb[i__8].r, z__9.i = d__1 * bb[i__8].i; + d_cnjg(&z__10, &bb[i__ - k + kb1 + k * bb_dim1]); + z__8.r = z__9.r * z__10.r - z__9.i * z__10.i, z__8.i = + z__9.r * z__10.i + z__9.i * z__10.r; + z__1.r = z__2.r + z__8.r, z__1.i = z__2.i + z__8.i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L520: */ + } +/* Computing MIN */ + i__1 = *n, i__2 = i__ + *ka; + i__4 = f2cmin(i__1,i__2); + for (j = i__ + kbt + 1; j <= i__4; ++j) { + i__1 = k - j + ka1 + j * ab_dim1; + i__2 = k - j + ka1 + j * ab_dim1; + d_cnjg(&z__3, &bb[i__ - k + kb1 + k * bb_dim1]); + i__5 = i__ - j + ka1 + j * ab_dim1; + z__2.r = z__3.r * ab[i__5].r - z__3.i * ab[i__5].i, + z__2.i = z__3.r * ab[i__5].i + z__3.i * ab[i__5] + .r; + z__1.r = ab[i__2].r - z__2.r, z__1.i = ab[i__2].i - + z__2.i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L530: */ + } +/* L540: */ + } + i__3 = i__; + for (j = i1; j <= i__3; ++j) { +/* Computing MIN */ + i__1 = j + *ka, i__2 = i__ + kbt; + i__4 = f2cmin(i__1,i__2); + for (k = i__ + 1; k <= i__4; ++k) { + i__1 = j - k + ka1 + k * ab_dim1; + i__2 = j - k + ka1 + k * ab_dim1; + i__5 = i__ - k + kb1 + k * bb_dim1; + i__6 = j - i__ + ka1 + i__ * ab_dim1; + z__2.r = bb[i__5].r * ab[i__6].r - bb[i__5].i * ab[i__6] + .i, z__2.i = bb[i__5].r * ab[i__6].i + bb[i__5].i + * ab[i__6].r; + z__1.r = ab[i__2].r - z__2.r, z__1.i = ab[i__2].i - + z__2.i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L550: */ + } +/* L560: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + d__1 = 1. / bii; + zdscal_(&nx, &d__1, &x[i__ * x_dim1 + 1], &c__1); + if (kbt > 0) { + z__1.r = -1., z__1.i = 0.; + i__3 = *ldbb - 1; + zgeru_(&nx, &kbt, &z__1, &x[i__ * x_dim1 + 1], &c__1, &bb[ + *kb + (i__ + 1) * bb_dim1], &i__3, &x[(i__ + 1) * + x_dim1 + 1], ldx); + } + } + +/* store a(i1,i) in RA1 for use in next loop over K */ + + i__3 = i1 - i__ + ka1 + i__ * ab_dim1; + ra1.r = ab[i__3].r, ra1.i = ab[i__3].i; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions up toward the top of the band */ + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ + k - ka1 > 0 && i__ + k < m) { + +/* generate rotation to annihilate a(i+k-ka-1,i) */ + + zlartg_(&ab[k + 1 + i__ * ab_dim1], &ra1, &rwork[i__ + k + - *ka], &work[i__ + k - *ka], &ra); + +/* create nonzero element a(i+k-ka-1,i+k) outside the */ +/* band and store it in WORK(m-kb+i+k) */ + + i__4 = kb1 - k + (i__ + k) * bb_dim1; + z__2.r = -bb[i__4].r, z__2.i = -bb[i__4].i; + z__1.r = z__2.r * ra1.r - z__2.i * ra1.i, z__1.i = z__2.r + * ra1.i + z__2.i * ra1.r; + t.r = z__1.r, t.i = z__1.i; + i__4 = m - *kb + i__ + k; + i__1 = i__ + k - *ka; + z__2.r = rwork[i__1] * t.r, z__2.i = rwork[i__1] * t.i; + d_cnjg(&z__4, &work[i__ + k - *ka]); + i__2 = (i__ + k) * ab_dim1 + 1; + z__3.r = z__4.r * ab[i__2].r - z__4.i * ab[i__2].i, + z__3.i = z__4.r * ab[i__2].i + z__4.i * ab[i__2] + .r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + work[i__4].r = z__1.r, work[i__4].i = z__1.i; + i__4 = (i__ + k) * ab_dim1 + 1; + i__1 = i__ + k - *ka; + z__2.r = work[i__1].r * t.r - work[i__1].i * t.i, z__2.i = + work[i__1].r * t.i + work[i__1].i * t.r; + i__2 = i__ + k - *ka; + i__5 = (i__ + k) * ab_dim1 + 1; + z__3.r = rwork[i__2] * ab[i__5].r, z__3.i = rwork[i__2] * + ab[i__5].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ab[i__4].r = z__1.r, ab[i__4].i = z__1.i; + ra1.r = ra.r, ra1.i = ra.i; + } + } +/* Computing MAX */ + i__4 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - f2cmax(i__4,i__1) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (update) { +/* Computing MIN */ + i__4 = j2, i__1 = i__ - (*ka << 1) + k - 1; + j2t = f2cmin(i__4,i__1); + } else { + j2t = j2; + } + nrt = (j2t + *ka - 1) / ka1; + i__4 = j2t; + i__1 = ka1; + for (j = j1; i__1 < 0 ? j >= i__4 : j <= i__4; j += i__1) { + +/* create nonzero element a(j-1,j+ka) outside the band */ +/* and store it in WORK(j) */ + + i__2 = j; + i__5 = j; + i__6 = (j + *ka - 1) * ab_dim1 + 1; + z__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * ab[i__6] + .i, z__1.i = work[i__5].r * ab[i__6].i + work[i__5].i + * ab[i__6].r; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; + i__2 = (j + *ka - 1) * ab_dim1 + 1; + i__5 = j; + i__6 = (j + *ka - 1) * ab_dim1 + 1; + z__1.r = rwork[i__5] * ab[i__6].r, z__1.i = rwork[i__5] * ab[ + i__6].i; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; +/* L570: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + zlargv_(&nrt, &ab[(j1 + *ka) * ab_dim1 + 1], &inca, &work[j1], + &ka1, &rwork[j1], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the left */ + + i__1 = *ka - 1; + for (l = 1; l <= i__1; ++l) { + zlartv_(&nr, &ab[ka1 - l + (j1 + l) * ab_dim1], &inca, & + ab[*ka - l + (j1 + l) * ab_dim1], &inca, &rwork[ + j1], &work[j1], &ka1); +/* L580: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + zlar2v_(&nr, &ab[ka1 + j1 * ab_dim1], &ab[ka1 + (j1 - 1) * + ab_dim1], &ab[*ka + j1 * ab_dim1], &inca, &rwork[j1], + &work[j1], &ka1); + + zlacgv_(&nr, &work[j1], &ka1); + } + +/* start applying rotations in 1st set from the right */ + + i__1 = *kb - k + 1; + for (l = *ka - 1; l >= i__1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( + j1t - 1) * ab_dim1], &inca, &rwork[j1t], &work[ + j1t], &ka1); + } +/* L590: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__1 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__1 : j <= i__1; j += i__4) { + zrot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &rwork[j], &work[j]); +/* L600: */ + } + } +/* L610: */ + } + + if (update) { + if (i2 > 0 && kbt > 0) { + +/* create nonzero element a(i+kbt-ka-1,i+kbt) outside the */ +/* band and store it in WORK(m-kb+i+kbt) */ + + i__3 = m - *kb + i__ + kbt; + i__4 = kb1 - kbt + (i__ + kbt) * bb_dim1; + z__2.r = -bb[i__4].r, z__2.i = -bb[i__4].i; + z__1.r = z__2.r * ra1.r - z__2.i * ra1.i, z__1.i = z__2.r * + ra1.i + z__2.i * ra1.r; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__3 = 2, i__4 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__3,i__4) * ka1; + } else { +/* Computing MAX */ + i__3 = 1, i__4 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__3,i__4) * ka1; + } + +/* finish applying rotations in 2nd set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + *ka + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[l + (j1t + *ka) * ab_dim1], &inca, &ab[ + l + 1 + (j1t + *ka - 1) * ab_dim1], &inca, &rwork[ + m - *kb + j1t + *ka], &work[m - *kb + j1t + *ka], + &ka1); + } +/* L620: */ + } + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + i__3 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + i__1 = m - *kb + j; + i__2 = m - *kb + j + *ka; + work[i__1].r = work[i__2].r, work[i__1].i = work[i__2].i; + rwork[m - *kb + j] = rwork[m - *kb + j + *ka]; +/* L630: */ + } + i__4 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + +/* create nonzero element a(j-1,j+ka) outside the band */ +/* and store it in WORK(m-kb+j) */ + + i__1 = m - *kb + j; + i__2 = m - *kb + j; + i__5 = (j + *ka - 1) * ab_dim1 + 1; + z__1.r = work[i__2].r * ab[i__5].r - work[i__2].i * ab[i__5] + .i, z__1.i = work[i__2].r * ab[i__5].i + work[i__2].i + * ab[i__5].r; + work[i__1].r = z__1.r, work[i__1].i = z__1.i; + i__1 = (j + *ka - 1) * ab_dim1 + 1; + i__2 = m - *kb + j; + i__5 = (j + *ka - 1) * ab_dim1 + 1; + z__1.r = rwork[i__2] * ab[i__5].r, z__1.i = rwork[i__2] * ab[ + i__5].i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L640: */ + } + if (update) { + if (i__ + k > ka1 && k <= kbt) { + i__3 = m - *kb + i__ + k - *ka; + i__4 = m - *kb + i__ + k; + work[i__3].r = work[i__4].r, work[i__3].i = work[i__4].i; + } + } +/* L650: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__3 = 1, i__4 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__3,i__4) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + zlargv_(&nr, &ab[(j1 + *ka) * ab_dim1 + 1], &inca, &work[m - * + kb + j1], &ka1, &rwork[m - *kb + j1], &ka1); + +/* apply rotations in 2nd set from the left */ + + i__3 = *ka - 1; + for (l = 1; l <= i__3; ++l) { + zlartv_(&nr, &ab[ka1 - l + (j1 + l) * ab_dim1], &inca, & + ab[*ka - l + (j1 + l) * ab_dim1], &inca, &rwork[m + - *kb + j1], &work[m - *kb + j1], &ka1); +/* L660: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + zlar2v_(&nr, &ab[ka1 + j1 * ab_dim1], &ab[ka1 + (j1 - 1) * + ab_dim1], &ab[*ka + j1 * ab_dim1], &inca, &rwork[m - * + kb + j1], &work[m - *kb + j1], &ka1); + + zlacgv_(&nr, &work[m - *kb + j1], &ka1); + } + +/* start applying rotations in 2nd set from the right */ + + i__3 = *kb - k + 1; + for (l = *ka - 1; l >= i__3; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( + j1t - 1) * ab_dim1], &inca, &rwork[m - *kb + j1t], + &work[m - *kb + j1t], &ka1); + } +/* L670: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__3 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + zrot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &rwork[m - *kb + j], &work[m - *kb + + j]); +/* L680: */ + } + } +/* L690: */ + } + + i__4 = *kb - 1; + for (k = 1; k <= i__4; ++k) { +/* Computing MAX */ + i__3 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - f2cmax(i__3,i__1) * ka1; + +/* finish applying rotations in 1st set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( + j1t - 1) * ab_dim1], &inca, &rwork[j1t], &work[ + j1t], &ka1); + } +/* L700: */ + } +/* L710: */ + } + + if (*kb > 1) { + i__4 = i2 - *ka; + for (j = 2; j <= i__4; ++j) { + rwork[j] = rwork[j + *ka]; + i__3 = j; + i__1 = j + *ka; + work[i__3].r = work[i__1].r, work[i__3].i = work[i__1].i; +/* L720: */ + } + } + + } else { + +/* Transform A, working with the lower triangle */ + + if (update) { + +/* Form inv(S(i))**H * A * inv(S(i)) */ + + i__4 = i__ * bb_dim1 + 1; + bii = bb[i__4].r; + i__4 = i__ * ab_dim1 + 1; + i__3 = i__ * ab_dim1 + 1; + d__1 = ab[i__3].r / bii / bii; + ab[i__4].r = d__1, ab[i__4].i = 0.; + i__4 = i__ - 1; + for (j = i1; j <= i__4; ++j) { + i__3 = i__ - j + 1 + j * ab_dim1; + i__1 = i__ - j + 1 + j * ab_dim1; + z__1.r = ab[i__1].r / bii, z__1.i = ab[i__1].i / bii; + ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; +/* L730: */ + } +/* Computing MIN */ + i__3 = *n, i__1 = i__ + *ka; + i__4 = f2cmin(i__3,i__1); + for (j = i__ + 1; j <= i__4; ++j) { + i__3 = j - i__ + 1 + i__ * ab_dim1; + i__1 = j - i__ + 1 + i__ * ab_dim1; + z__1.r = ab[i__1].r / bii, z__1.i = ab[i__1].i / bii; + ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; +/* L740: */ + } + i__4 = i__ + kbt; + for (k = i__ + 1; k <= i__4; ++k) { + i__3 = i__ + kbt; + for (j = k; j <= i__3; ++j) { + i__1 = j - k + 1 + k * ab_dim1; + i__2 = j - k + 1 + k * ab_dim1; + i__5 = j - i__ + 1 + i__ * bb_dim1; + d_cnjg(&z__5, &ab[k - i__ + 1 + i__ * ab_dim1]); + z__4.r = bb[i__5].r * z__5.r - bb[i__5].i * z__5.i, + z__4.i = bb[i__5].r * z__5.i + bb[i__5].i * + z__5.r; + z__3.r = ab[i__2].r - z__4.r, z__3.i = ab[i__2].i - + z__4.i; + d_cnjg(&z__7, &bb[k - i__ + 1 + i__ * bb_dim1]); + i__6 = j - i__ + 1 + i__ * ab_dim1; + z__6.r = z__7.r * ab[i__6].r - z__7.i * ab[i__6].i, + z__6.i = z__7.r * ab[i__6].i + z__7.i * ab[i__6] + .r; + z__2.r = z__3.r - z__6.r, z__2.i = z__3.i - z__6.i; + i__7 = i__ * ab_dim1 + 1; + d__1 = ab[i__7].r; + i__8 = j - i__ + 1 + i__ * bb_dim1; + z__9.r = d__1 * bb[i__8].r, z__9.i = d__1 * bb[i__8].i; + d_cnjg(&z__10, &bb[k - i__ + 1 + i__ * bb_dim1]); + z__8.r = z__9.r * z__10.r - z__9.i * z__10.i, z__8.i = + z__9.r * z__10.i + z__9.i * z__10.r; + z__1.r = z__2.r + z__8.r, z__1.i = z__2.i + z__8.i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L750: */ + } +/* Computing MIN */ + i__1 = *n, i__2 = i__ + *ka; + i__3 = f2cmin(i__1,i__2); + for (j = i__ + kbt + 1; j <= i__3; ++j) { + i__1 = j - k + 1 + k * ab_dim1; + i__2 = j - k + 1 + k * ab_dim1; + d_cnjg(&z__3, &bb[k - i__ + 1 + i__ * bb_dim1]); + i__5 = j - i__ + 1 + i__ * ab_dim1; + z__2.r = z__3.r * ab[i__5].r - z__3.i * ab[i__5].i, + z__2.i = z__3.r * ab[i__5].i + z__3.i * ab[i__5] + .r; + z__1.r = ab[i__2].r - z__2.r, z__1.i = ab[i__2].i - + z__2.i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L760: */ + } +/* L770: */ + } + i__4 = i__; + for (j = i1; j <= i__4; ++j) { +/* Computing MIN */ + i__1 = j + *ka, i__2 = i__ + kbt; + i__3 = f2cmin(i__1,i__2); + for (k = i__ + 1; k <= i__3; ++k) { + i__1 = k - j + 1 + j * ab_dim1; + i__2 = k - j + 1 + j * ab_dim1; + i__5 = k - i__ + 1 + i__ * bb_dim1; + i__6 = i__ - j + 1 + j * ab_dim1; + z__2.r = bb[i__5].r * ab[i__6].r - bb[i__5].i * ab[i__6] + .i, z__2.i = bb[i__5].r * ab[i__6].i + bb[i__5].i + * ab[i__6].r; + z__1.r = ab[i__2].r - z__2.r, z__1.i = ab[i__2].i - + z__2.i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L780: */ + } +/* L790: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + d__1 = 1. / bii; + zdscal_(&nx, &d__1, &x[i__ * x_dim1 + 1], &c__1); + if (kbt > 0) { + z__1.r = -1., z__1.i = 0.; + zgerc_(&nx, &kbt, &z__1, &x[i__ * x_dim1 + 1], &c__1, &bb[ + i__ * bb_dim1 + 2], &c__1, &x[(i__ + 1) * x_dim1 + + 1], ldx); + } + } + +/* store a(i,i1) in RA1 for use in next loop over K */ + + i__4 = i__ - i1 + 1 + i1 * ab_dim1; + ra1.r = ab[i__4].r, ra1.i = ab[i__4].i; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions up toward the top of the band */ + + i__4 = *kb - 1; + for (k = 1; k <= i__4; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ + k - ka1 > 0 && i__ + k < m) { + +/* generate rotation to annihilate a(i,i+k-ka-1) */ + + zlartg_(&ab[ka1 - k + (i__ + k - *ka) * ab_dim1], &ra1, & + rwork[i__ + k - *ka], &work[i__ + k - *ka], &ra); + +/* create nonzero element a(i+k,i+k-ka-1) outside the */ +/* band and store it in WORK(m-kb+i+k) */ + + i__3 = k + 1 + i__ * bb_dim1; + z__2.r = -bb[i__3].r, z__2.i = -bb[i__3].i; + z__1.r = z__2.r * ra1.r - z__2.i * ra1.i, z__1.i = z__2.r + * ra1.i + z__2.i * ra1.r; + t.r = z__1.r, t.i = z__1.i; + i__3 = m - *kb + i__ + k; + i__1 = i__ + k - *ka; + z__2.r = rwork[i__1] * t.r, z__2.i = rwork[i__1] * t.i; + d_cnjg(&z__4, &work[i__ + k - *ka]); + i__2 = ka1 + (i__ + k - *ka) * ab_dim1; + z__3.r = z__4.r * ab[i__2].r - z__4.i * ab[i__2].i, + z__3.i = z__4.r * ab[i__2].i + z__4.i * ab[i__2] + .r; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + i__3 = ka1 + (i__ + k - *ka) * ab_dim1; + i__1 = i__ + k - *ka; + z__2.r = work[i__1].r * t.r - work[i__1].i * t.i, z__2.i = + work[i__1].r * t.i + work[i__1].i * t.r; + i__2 = i__ + k - *ka; + i__5 = ka1 + (i__ + k - *ka) * ab_dim1; + z__3.r = rwork[i__2] * ab[i__5].r, z__3.i = rwork[i__2] * + ab[i__5].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; + ra1.r = ra.r, ra1.i = ra.i; + } + } +/* Computing MAX */ + i__3 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - f2cmax(i__3,i__1) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (update) { +/* Computing MIN */ + i__3 = j2, i__1 = i__ - (*ka << 1) + k - 1; + j2t = f2cmin(i__3,i__1); + } else { + j2t = j2; + } + nrt = (j2t + *ka - 1) / ka1; + i__3 = j2t; + i__1 = ka1; + for (j = j1; i__1 < 0 ? j >= i__3 : j <= i__3; j += i__1) { + +/* create nonzero element a(j+ka,j-1) outside the band */ +/* and store it in WORK(j) */ + + i__2 = j; + i__5 = j; + i__6 = ka1 + (j - 1) * ab_dim1; + z__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * ab[i__6] + .i, z__1.i = work[i__5].r * ab[i__6].i + work[i__5].i + * ab[i__6].r; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; + i__2 = ka1 + (j - 1) * ab_dim1; + i__5 = j; + i__6 = ka1 + (j - 1) * ab_dim1; + z__1.r = rwork[i__5] * ab[i__6].r, z__1.i = rwork[i__5] * ab[ + i__6].i; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; +/* L800: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + zlargv_(&nrt, &ab[ka1 + j1 * ab_dim1], &inca, &work[j1], &ka1, + &rwork[j1], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the right */ + + i__1 = *ka - 1; + for (l = 1; l <= i__1; ++l) { + zlartv_(&nr, &ab[l + 1 + j1 * ab_dim1], &inca, &ab[l + 2 + + (j1 - 1) * ab_dim1], &inca, &rwork[j1], &work[ + j1], &ka1); +/* L810: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + zlar2v_(&nr, &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + + 1], &ab[(j1 - 1) * ab_dim1 + 2], &inca, &rwork[j1], & + work[j1], &ka1); + + zlacgv_(&nr, &work[j1], &ka1); + } + +/* start applying rotations in 1st set from the left */ + + i__1 = *kb - k + 1; + for (l = *ka - 1; l >= i__1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] + , &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], + &inca, &rwork[j1t], &work[j1t], &ka1); + } +/* L820: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__1 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { + d_cnjg(&z__1, &work[j]); + zrot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &rwork[j], &z__1); +/* L830: */ + } + } +/* L840: */ + } + + if (update) { + if (i2 > 0 && kbt > 0) { + +/* create nonzero element a(i+kbt,i+kbt-ka-1) outside the */ +/* band and store it in WORK(m-kb+i+kbt) */ + + i__4 = m - *kb + i__ + kbt; + i__3 = kbt + 1 + i__ * bb_dim1; + z__2.r = -bb[i__3].r, z__2.i = -bb[i__3].i; + z__1.r = z__2.r * ra1.r - z__2.i * ra1.i, z__1.i = z__2.r * + ra1.i + z__2.i * ra1.r; + work[i__4].r = z__1.r, work[i__4].i = z__1.i; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__4 = 2, i__3 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__4,i__3) * ka1; + } else { +/* Computing MAX */ + i__4 = 1, i__3 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__4,i__3) * ka1; + } + +/* finish applying rotations in 2nd set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + *ka + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[ka1 - l + 1 + (j1t + l - 1) * ab_dim1], + &inca, &ab[ka1 - l + (j1t + l - 1) * ab_dim1], & + inca, &rwork[m - *kb + j1t + *ka], &work[m - *kb + + j1t + *ka], &ka1); + } +/* L850: */ + } + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + i__4 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + i__1 = m - *kb + j; + i__2 = m - *kb + j + *ka; + work[i__1].r = work[i__2].r, work[i__1].i = work[i__2].i; + rwork[m - *kb + j] = rwork[m - *kb + j + *ka]; +/* L860: */ + } + i__3 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j+ka,j-1) outside the band */ +/* and store it in WORK(m-kb+j) */ + + i__1 = m - *kb + j; + i__2 = m - *kb + j; + i__5 = ka1 + (j - 1) * ab_dim1; + z__1.r = work[i__2].r * ab[i__5].r - work[i__2].i * ab[i__5] + .i, z__1.i = work[i__2].r * ab[i__5].i + work[i__2].i + * ab[i__5].r; + work[i__1].r = z__1.r, work[i__1].i = z__1.i; + i__1 = ka1 + (j - 1) * ab_dim1; + i__2 = m - *kb + j; + i__5 = ka1 + (j - 1) * ab_dim1; + z__1.r = rwork[i__2] * ab[i__5].r, z__1.i = rwork[i__2] * ab[ + i__5].i; + ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; +/* L870: */ + } + if (update) { + if (i__ + k > ka1 && k <= kbt) { + i__4 = m - *kb + i__ + k - *ka; + i__3 = m - *kb + i__ + k; + work[i__4].r = work[i__3].r, work[i__4].i = work[i__3].i; + } + } +/* L880: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__4 = 1, i__3 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__4,i__3) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + zlargv_(&nr, &ab[ka1 + j1 * ab_dim1], &inca, &work[m - *kb + + j1], &ka1, &rwork[m - *kb + j1], &ka1); + +/* apply rotations in 2nd set from the right */ + + i__4 = *ka - 1; + for (l = 1; l <= i__4; ++l) { + zlartv_(&nr, &ab[l + 1 + j1 * ab_dim1], &inca, &ab[l + 2 + + (j1 - 1) * ab_dim1], &inca, &rwork[m - *kb + j1] + , &work[m - *kb + j1], &ka1); +/* L890: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + zlar2v_(&nr, &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + + 1], &ab[(j1 - 1) * ab_dim1 + 2], &inca, &rwork[m - * + kb + j1], &work[m - *kb + j1], &ka1); + + zlacgv_(&nr, &work[m - *kb + j1], &ka1); + } + +/* start applying rotations in 2nd set from the left */ + + i__4 = *kb - k + 1; + for (l = *ka - 1; l >= i__4; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] + , &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], + &inca, &rwork[m - *kb + j1t], &work[m - *kb + + j1t], &ka1); + } +/* L900: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__4 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + d_cnjg(&z__1, &work[m - *kb + j]); + zrot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &rwork[m - *kb + j], &z__1); +/* L910: */ + } + } +/* L920: */ + } + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { +/* Computing MAX */ + i__4 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - f2cmax(i__4,i__1) * ka1; + +/* finish applying rotations in 1st set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + zlartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] + , &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], + &inca, &rwork[j1t], &work[j1t], &ka1); + } +/* L930: */ + } +/* L940: */ + } + + if (*kb > 1) { + i__3 = i2 - *ka; + for (j = 2; j <= i__3; ++j) { + rwork[j] = rwork[j + *ka]; + i__4 = j; + i__1 = j + *ka; + work[i__4].r = work[i__1].r, work[i__4].i = work[i__1].i; +/* L950: */ + } + } + + } + + goto L490; + +/* End of ZHBGST */ + +} /* zhbgst_ */ + diff --git a/lapack-netlib/SRC/zhbgv.c b/lapack-netlib/SRC/zhbgv.c new file mode 100644 index 000000000..b8929f7a7 --- /dev/null +++ b/lapack-netlib/SRC/zhbgv.c @@ -0,0 +1,696 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHBGV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBGV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, */ +/* LDZ, WORK, RWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBGV computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite banded eigenproblem, of */ +/* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */ +/* > and banded, and B is also positive definite. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KA */ +/* > \verbatim */ +/* > KA is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KB */ +/* > \verbatim */ +/* > KB is INTEGER */ +/* > The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first ka+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+ka). */ +/* > */ +/* > On exit, the contents of AB are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KA+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] BB */ +/* > \verbatim */ +/* > BB is COMPLEX*16 array, dimension (LDBB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix B, stored in the first kb+1 rows of the array. The */ +/* > j-th column of B is stored in the j-th column of the array BB */ +/* > as follows: */ +/* > if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */ +/* > if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=f2cmin(n,j+kb). */ +/* > */ +/* > On exit, the factor S from the split Cholesky factorization */ +/* > B = S**H*S, as returned by ZPBSTF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBB */ +/* > \verbatim */ +/* > LDBB is INTEGER */ +/* > The leading dimension of the array BB. LDBB >= KB+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* > eigenvectors, with the i-th column of Z holding the */ +/* > eigenvector associated with W(i). The eigenvectors are */ +/* > normalized so that Z**H*B*Z = I. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (3*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is: */ +/* > <= N: the algorithm failed to converge: */ +/* > i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF */ +/* > returned INFO = i: B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int zhbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, + integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1; + + /* Local variables */ + integer inde; + char vect[1]; + extern logical lsame_(char *, char *); + integer iinfo; + logical upper, wantz; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dsterf_( + integer *, doublereal *, doublereal *, integer *), zhbtrd_(char *, + char *, integer *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer indwrk; + extern /* Subroutine */ int zhbgst_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublereal *, + integer *), zpbstf_(char *, integer *, integer *, + doublecomplex *, integer *, integer *), zsteqr_(char *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *, + doublereal *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1 * 1; + bb -= bb_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ka < 0) { + *info = -4; + } else if (*kb < 0 || *kb > *ka) { + *info = -5; + } else if (*ldab < *ka + 1) { + *info = -7; + } else if (*ldbb < *kb + 1) { + *info = -9; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBGV ", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a split Cholesky factorization of B. */ + + zpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem. */ + + inde = 1; + indwrk = inde + *n; + zhbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, + &z__[z_offset], ldz, &work[1], &rwork[indwrk], &iinfo); + +/* Reduce to tridiagonal form. */ + + if (wantz) { + *(unsigned char *)vect = 'U'; + } else { + *(unsigned char *)vect = 'N'; + } + zhbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], & + z__[z_offset], ldz, &work[1], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEQR. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zsteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[ + indwrk], info); + } + return 0; + +/* End of ZHBGV */ + +} /* zhbgv_ */ + diff --git a/lapack-netlib/SRC/zhbgvd.c b/lapack-netlib/SRC/zhbgvd.c new file mode 100644 index 000000000..22a733daa --- /dev/null +++ b/lapack-netlib/SRC/zhbgvd.c @@ -0,0 +1,829 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHBGVD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBGVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, */ +/* Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, */ +/* LIWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, */ +/* $ LWORK, N */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBGVD computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite banded eigenproblem, of */ +/* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */ +/* > and banded, and B is also positive definite. If eigenvectors are */ +/* > desired, it uses a divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KA */ +/* > \verbatim */ +/* > KA is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KB */ +/* > \verbatim */ +/* > KB is INTEGER */ +/* > The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first ka+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+ka). */ +/* > */ +/* > On exit, the contents of AB are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KA+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] BB */ +/* > \verbatim */ +/* > BB is COMPLEX*16 array, dimension (LDBB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix B, stored in the first kb+1 rows of the array. The */ +/* > j-th column of B is stored in the j-th column of the array BB */ +/* > as follows: */ +/* > if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */ +/* > if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=f2cmin(n,j+kb). */ +/* > */ +/* > On exit, the factor S from the split Cholesky factorization */ +/* > B = S**H*S, as returned by ZPBSTF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBB */ +/* > \verbatim */ +/* > LDBB is INTEGER */ +/* > The leading dimension of the array BB. LDBB >= KB+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* > eigenvectors, with the i-th column of Z holding the */ +/* > eigenvector associated with W(i). The eigenvectors are */ +/* > normalized so that Z**H*B*Z = I. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO=0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N <= 1, LWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK >= N. */ +/* > If JOBZ = 'V' and N > 1, LWORK >= 2*N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) */ +/* > On exit, if INFO=0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of array RWORK. */ +/* > If N <= 1, LRWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK >= N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO=0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of array IWORK. */ +/* > If JOBZ = 'N' or N <= 1, LIWORK >= 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is: */ +/* > <= N: the algorithm failed to converge: */ +/* > i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF */ +/* > returned INFO = i: B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16OTHEReigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ +/* Subroutine */ int zhbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, + integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, integer *lwork, doublereal *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1; + + /* Local variables */ + integer inde; + char vect[1]; + integer llwk2; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer lwmin; + logical upper; + integer llrwk; + logical wantz; + integer indwk2; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dsterf_( + integer *, doublereal *, doublereal *, integer *), zstedc_(char *, + integer *, doublereal *, doublereal *, doublecomplex *, integer * + , doublecomplex *, integer *, doublereal *, integer *, integer *, + integer *, integer *), zhbtrd_(char *, char *, integer *, + integer *, doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer indwrk, liwmin; + extern /* Subroutine */ int zhbgst_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublereal *, + integer *), zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer lrwmin; + extern /* Subroutine */ int zpbstf_(char *, integer *, integer *, + doublecomplex *, integer *, integer *); + logical lquery; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1 * 1; + bb -= bb_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = *n + 1; + lrwmin = *n + 1; + liwmin = 1; + } else if (wantz) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = i__1 * i__1 << 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n; + lrwmin = *n; + liwmin = 1; + } + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ka < 0) { + *info = -4; + } else if (*kb < 0 || *kb > *ka) { + *info = -5; + } else if (*ldab < *ka + 1) { + *info = -7; + } else if (*ldbb < *kb + 1) { + *info = -9; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -12; + } + + if (*info == 0) { + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -14; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -16; + } else if (*liwork < liwmin && ! lquery) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBGVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a split Cholesky factorization of B. */ + + zpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem. */ + + inde = 1; + indwrk = inde + *n; + indwk2 = *n * *n + 1; + llwk2 = *lwork - indwk2 + 2; + llrwk = *lrwork - indwrk + 2; + zhbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, + &z__[z_offset], ldz, &work[1], &rwork[1], &iinfo); + +/* Reduce Hermitian band matrix to tridiagonal form. */ + + if (wantz) { + *(unsigned char *)vect = 'U'; + } else { + *(unsigned char *)vect = 'N'; + } + zhbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], & + z__[z_offset], ldz, &work[1], &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], & + llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info); + zgemm_("N", "N", n, n, n, &c_b1, &z__[z_offset], ldz, &work[1], n, & + c_b2, &work[indwk2], n); + zlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); + } + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + return 0; + +/* End of ZHBGVD */ + +} /* zhbgvd_ */ + diff --git a/lapack-netlib/SRC/zhbgvx.c b/lapack-netlib/SRC/zhbgvx.c new file mode 100644 index 000000000..3be1b0b9e --- /dev/null +++ b/lapack-netlib/SRC/zhbgvx.c @@ -0,0 +1,976 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHBGVX */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBGVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, */ +/* LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, */ +/* LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, */ +/* $ N */ +/* DOUBLE PRECISION ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), */ +/* $ WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBGVX computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite banded eigenproblem, of */ +/* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */ +/* > and banded, and B is also positive definite. Eigenvalues and */ +/* > eigenvectors can be selected by specifying either all eigenvalues, */ +/* > a range of values or a range of indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found; */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found; */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KA */ +/* > \verbatim */ +/* > KA is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KB */ +/* > \verbatim */ +/* > KB is INTEGER */ +/* > The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first ka+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+ka). */ +/* > */ +/* > On exit, the contents of AB are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KA+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] BB */ +/* > \verbatim */ +/* > BB is COMPLEX*16 array, dimension (LDBB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix B, stored in the first kb+1 rows of the array. The */ +/* > j-th column of B is stored in the j-th column of the array BB */ +/* > as follows: */ +/* > if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */ +/* > if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=f2cmin(n,j+kb). */ +/* > */ +/* > On exit, the factor S from the split Cholesky factorization */ +/* > B = S**H*S, as returned by ZPBSTF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBB */ +/* > \verbatim */ +/* > LDBB is INTEGER */ +/* > The leading dimension of the array BB. LDBB >= KB+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ, N) */ +/* > If JOBZ = 'V', the n-by-n matrix used in the reduction of */ +/* > A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, */ +/* > and consequently C to tridiagonal form. */ +/* > If JOBZ = 'N', the array Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. If JOBZ = 'N', */ +/* > LDQ >= 1. If JOBZ = 'V', LDQ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is DOUBLE PRECISION */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing AP to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*DLAMCH('S'). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* > eigenvectors, with the i-th column of Z holding the */ +/* > eigenvector associated with W(i). The eigenvectors are */ +/* > normalized so that Z**H*B*Z = I. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is: */ +/* > <= N: then i eigenvectors failed to converge. Their */ +/* > indices are stored in array IFAIL. */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF */ +/* > returned INFO = i: B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16OTHEReigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ +/* Subroutine */ int zhbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, doublecomplex *ab, integer *ldab, + doublecomplex *bb, integer *ldbb, doublecomplex *q, integer *ldq, + doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal * + abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, + doublecomplex *work, doublereal *rwork, integer *iwork, integer * + ifail, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2; + + /* Local variables */ + integer indd, inde; + char vect[1]; + logical test; + integer itmp1, i__, j, indee; + extern logical lsame_(char *, char *); + integer iinfo; + char order[1]; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + logical upper, wantz; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + integer jj; + logical alleig, indeig; + integer indibl; + logical valeig; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer indiwk, indisp; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), dstebz_(char *, char *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *), + zhbtrd_(char *, char *, integer *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer indrwk, indwrk; + extern /* Subroutine */ int zhbgst_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublereal *, + integer *), zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer nsplit; + extern /* Subroutine */ int zpbstf_(char *, integer *, integer *, + doublecomplex *, integer *, integer *), zstein_(integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, doublecomplex *, integer *, doublereal *, integer *, + integer *, integer *), zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *); + doublereal tmp1; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1 * 1; + bb -= bb_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ka < 0) { + *info = -5; + } else if (*kb < 0 || *kb > *ka) { + *info = -6; + } else if (*ldab < *ka + 1) { + *info = -8; + } else if (*ldbb < *kb + 1) { + *info = -10; + } else if (*ldq < 1 || wantz && *ldq < *n) { + *info = -12; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -14; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -15; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -16; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -21; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBGVX", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + +/* Form a split Cholesky factorization of B. */ + + zpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem. */ + + zhbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, + &q[q_offset], ldq, &work[1], &rwork[1], &iinfo); + +/* Solve the standard eigenvalue problem. */ +/* Reduce Hermitian band matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + indwrk = 1; + if (wantz) { + *(unsigned char *)vect = 'U'; + } else { + *(unsigned char *)vect = 'N'; + } + zhbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &rwork[indd], &rwork[ + inde], &q[q_offset], ldq, &work[indwrk], &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal */ +/* to zero, then call DSTERF or ZSTEQR. If this fails for some */ +/* eigenvalue, then try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + dcopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + i__1 = *n - 1; + dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + if (! wantz) { + dsterf_(n, &w[1], &rwork[indee], info); + } else { + zlacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); + zsteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, */ +/* call ZSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + dstebz_(range, order, n, vl, vu, il, iu, abstol, &rwork[indd], &rwork[ + inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[ + indrwk], &iwork[indiwk], info); + + if (wantz) { + zstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by ZSTEIN. */ + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); + zgemv_("N", n, n, &c_b2, &q[q_offset], ldq, &work[1], &c__1, & + c_b1, &z__[j * z_dim1 + 1], &c__1); +/* L20: */ + } + } + +L30: + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L50: */ + } + } + + return 0; + +/* End of ZHBGVX */ + +} /* zhbgvx_ */ + diff --git a/lapack-netlib/SRC/zhbtrd.c b/lapack-netlib/SRC/zhbtrd.c new file mode 100644 index 000000000..3af317394 --- /dev/null +++ b/lapack-netlib/SRC/zhbtrd.c @@ -0,0 +1,1255 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHBTRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHBTRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, */ +/* WORK, INFO ) */ + +/* CHARACTER UPLO, VECT */ +/* INTEGER INFO, KD, LDAB, LDQ, N */ +/* DOUBLE PRECISION D( * ), E( * ) */ +/* COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHBTRD reduces a complex Hermitian band matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > = 'N': do not form Q; */ +/* > = 'V': form Q; */ +/* > = 'U': update a matrix X, by forming X*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB,N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > On exit, the diagonal elements of AB are overwritten by the */ +/* > diagonal elements of the tridiagonal matrix T; if KD > 0, the */ +/* > elements on the first superdiagonal (if UPLO = 'U') or the */ +/* > first subdiagonal (if UPLO = 'L') are overwritten by the */ +/* > off-diagonal elements of T; the rest of AB is overwritten by */ +/* > values generated during the reduction. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ,N) */ +/* > On entry, if VECT = 'U', then Q must contain an N-by-N */ +/* > matrix X; if VECT = 'N' or 'V', then Q need not be set. */ +/* > */ +/* > On exit: */ +/* > if VECT = 'V', Q contains the N-by-N unitary matrix Q; */ +/* > if VECT = 'U', Q contains the product X*Q; */ +/* > if VECT = 'N', the array Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. */ +/* > LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Modified by Linda Kaufman, Bell Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhbtrd_(char *vect, char *uplo, integer *n, integer *kd, + doublecomplex *ab, integer *ldab, doublereal *d__, doublereal *e, + doublecomplex *q, integer *ldq, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, + i__5, i__6; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + integer inca, jend, lend, jinc; + doublereal abst; + integer incx, last; + doublecomplex temp; + extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *); + integer j1end, j1inc, i__, j, k, l; + doublecomplex t; + integer iqend; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + logical initq, wantq, upper; + integer i2, j1, j2; + extern /* Subroutine */ int zlar2v_(integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *); + integer nq, nr, iqaend; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zlacgv_( + integer *, doublecomplex *, integer *); + integer kd1; + extern /* Subroutine */ int zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *), zlartg_(doublecomplex *, doublecomplex *, doublereal *, + doublecomplex *, doublecomplex *), zlargv_(integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *), zlartv_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *); + integer ibl, iqb, kdn, jin, nrt, kdm1; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --work; + + /* Function Body */ + initq = lsame_(vect, "V"); + wantq = initq || lsame_(vect, "U"); + upper = lsame_(uplo, "U"); + kd1 = *kd + 1; + kdm1 = *kd - 1; + incx = *ldab - 1; + iqend = 1; + + *info = 0; + if (! wantq && ! lsame_(vect, "N")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < kd1) { + *info = -6; + } else if (*ldq < f2cmax(1,*n) && wantq) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHBTRD", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Initialize Q to the unit matrix, if needed */ + + if (initq) { + zlaset_("Full", n, n, &c_b1, &c_b2, &q[q_offset], ldq); + } + +/* Wherever possible, plane rotations are generated and applied in */ +/* vector operations of length NR over the index set J1:J2:KD1. */ + +/* The real cosines and complex sines of the plane rotations are */ +/* stored in the arrays D and WORK. */ + + inca = kd1 * *ldab; +/* Computing MIN */ + i__1 = *n - 1; + kdn = f2cmin(i__1,*kd); + if (upper) { + + if (*kd > 1) { + +/* Reduce to complex Hermitian tridiagonal form, working with */ +/* the upper triangle */ + + nr = 0; + j1 = kdn + 2; + j2 = 1; + + i__1 = kd1 + ab_dim1; + i__2 = kd1 + ab_dim1; + d__1 = ab[i__2].r; + ab[i__1].r = d__1, ab[i__1].i = 0.; + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Reduce i-th row of matrix to tridiagonal form */ + + for (k = kdn + 1; k >= 2; --k) { + j1 += kdn; + j2 += kdn; + + if (nr > 0) { + +/* generate plane rotations to annihilate nonzero */ +/* elements which have been created outside the band */ + + zlargv_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &inca, & + work[j1], &kd1, &d__[j1], &kd1); + +/* apply rotations from the right */ + + +/* Dependent on the the number of diagonals either */ +/* ZLARTV or ZROT is used */ + + if (nr >= (*kd << 1) - 1) { + i__2 = *kd - 1; + for (l = 1; l <= i__2; ++l) { + zlartv_(&nr, &ab[l + 1 + (j1 - 1) * ab_dim1], + &inca, &ab[l + j1 * ab_dim1], &inca, & + d__[j1], &work[j1], &kd1); +/* L10: */ + } + + } else { + jend = j1 + (nr - 1) * kd1; + i__2 = jend; + i__3 = kd1; + for (jinc = j1; i__3 < 0 ? jinc >= i__2 : jinc <= + i__2; jinc += i__3) { + zrot_(&kdm1, &ab[(jinc - 1) * ab_dim1 + 2], & + c__1, &ab[jinc * ab_dim1 + 1], &c__1, + &d__[jinc], &work[jinc]); +/* L20: */ + } + } + } + + + if (k > 2) { + if (k <= *n - i__ + 1) { + +/* generate plane rotation to annihilate a(i,i+k-1) */ +/* within the band */ + + zlartg_(&ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1] + , &ab[*kd - k + 2 + (i__ + k - 1) * + ab_dim1], &d__[i__ + k - 1], &work[i__ + + k - 1], &temp); + i__3 = *kd - k + 3 + (i__ + k - 2) * ab_dim1; + ab[i__3].r = temp.r, ab[i__3].i = temp.i; + +/* apply rotation from the right */ + + i__3 = k - 3; + zrot_(&i__3, &ab[*kd - k + 4 + (i__ + k - 2) * + ab_dim1], &c__1, &ab[*kd - k + 3 + (i__ + + k - 1) * ab_dim1], &c__1, &d__[i__ + k - + 1], &work[i__ + k - 1]); + } + ++nr; + j1 = j1 - kdn - 1; + } + +/* apply plane rotations from both sides to diagonal */ +/* blocks */ + + if (nr > 0) { + zlar2v_(&nr, &ab[kd1 + (j1 - 1) * ab_dim1], &ab[kd1 + + j1 * ab_dim1], &ab[*kd + j1 * ab_dim1], &inca, + &d__[j1], &work[j1], &kd1); + } + +/* apply plane rotations from the left */ + + if (nr > 0) { + zlacgv_(&nr, &work[j1], &kd1); + if ((*kd << 1) - 1 < nr) { + +/* Dependent on the the number of diagonals either */ +/* ZLARTV or ZROT is used */ + + i__3 = *kd - 1; + for (l = 1; l <= i__3; ++l) { + if (j2 + l > *n) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + zlartv_(&nrt, &ab[*kd - l + (j1 + l) * + ab_dim1], &inca, &ab[*kd - l + 1 + + (j1 + l) * ab_dim1], &inca, & + d__[j1], &work[j1], &kd1); + } +/* L30: */ + } + } else { + j1end = j1 + kd1 * (nr - 2); + if (j1end >= j1) { + i__3 = j1end; + i__2 = kd1; + for (jin = j1; i__2 < 0 ? jin >= i__3 : jin <= + i__3; jin += i__2) { + i__4 = *kd - 1; + zrot_(&i__4, &ab[*kd - 1 + (jin + 1) * + ab_dim1], &incx, &ab[*kd + (jin + + 1) * ab_dim1], &incx, &d__[jin], & + work[jin]); +/* L40: */ + } + } +/* Computing MIN */ + i__2 = kdm1, i__3 = *n - j2; + lend = f2cmin(i__2,i__3); + last = j1end + kd1; + if (lend > 0) { + zrot_(&lend, &ab[*kd - 1 + (last + 1) * + ab_dim1], &incx, &ab[*kd + (last + 1) + * ab_dim1], &incx, &d__[last], &work[ + last]); + } + } + } + + if (wantq) { + +/* accumulate product of plane rotations in Q */ + + if (initq) { + +/* take advantage of the fact that Q was */ +/* initially the Identity matrix */ + + iqend = f2cmax(iqend,j2); +/* Computing MAX */ + i__2 = 0, i__3 = k - 3; + i2 = f2cmax(i__2,i__3); + iqaend = i__ * *kd + 1; + if (k == 2) { + iqaend += *kd; + } + iqaend = f2cmin(iqaend,iqend); + i__2 = j2; + i__3 = kd1; + for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j + += i__3) { + ibl = i__ - i2 / kdm1; + ++i2; +/* Computing MAX */ + i__4 = 1, i__5 = j - ibl; + iqb = f2cmax(i__4,i__5); + nq = iqaend + 1 - iqb; +/* Computing MIN */ + i__4 = iqaend + *kd; + iqaend = f2cmin(i__4,iqend); + d_cnjg(&z__1, &work[j]); + zrot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, + &q[iqb + j * q_dim1], &c__1, &d__[j], + &z__1); +/* L50: */ + } + } else { + + i__3 = j2; + i__2 = kd1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j + += i__2) { + d_cnjg(&z__1, &work[j]); + zrot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ + j * q_dim1 + 1], &c__1, &d__[j], & + z__1); +/* L60: */ + } + } + + } + + if (j2 + kdn > *n) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 = j2 - kdn - 1; + } + + i__2 = j2; + i__3 = kd1; + for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) + { + +/* create nonzero element a(j-1,j+kd) outside the band */ +/* and store it in WORK */ + + i__4 = j + *kd; + i__5 = j; + i__6 = (j + *kd) * ab_dim1 + 1; + z__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * + ab[i__6].i, z__1.i = work[i__5].r * ab[i__6] + .i + work[i__5].i * ab[i__6].r; + work[i__4].r = z__1.r, work[i__4].i = z__1.i; + i__4 = (j + *kd) * ab_dim1 + 1; + i__5 = j; + i__6 = (j + *kd) * ab_dim1 + 1; + z__1.r = d__[i__5] * ab[i__6].r, z__1.i = d__[i__5] * + ab[i__6].i; + ab[i__4].r = z__1.r, ab[i__4].i = z__1.i; +/* L70: */ + } +/* L80: */ + } +/* L90: */ + } + } + + if (*kd > 0) { + +/* make off-diagonal elements real and copy them to E */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__3 = *kd + (i__ + 1) * ab_dim1; + t.r = ab[i__3].r, t.i = ab[i__3].i; + abst = z_abs(&t); + i__3 = *kd + (i__ + 1) * ab_dim1; + ab[i__3].r = abst, ab[i__3].i = 0.; + e[i__] = abst; + if (abst != 0.) { + z__1.r = t.r / abst, z__1.i = t.i / abst; + t.r = z__1.r, t.i = z__1.i; + } else { + t.r = 1., t.i = 0.; + } + if (i__ < *n - 1) { + i__3 = *kd + (i__ + 2) * ab_dim1; + i__2 = *kd + (i__ + 2) * ab_dim1; + z__1.r = ab[i__2].r * t.r - ab[i__2].i * t.i, z__1.i = ab[ + i__2].r * t.i + ab[i__2].i * t.r; + ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; + } + if (wantq) { + d_cnjg(&z__1, &t); + zscal_(n, &z__1, &q[(i__ + 1) * q_dim1 + 1], &c__1); + } +/* L100: */ + } + } else { + +/* set E to zero if original matrix was diagonal */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.; +/* L110: */ + } + } + +/* copy diagonal elements to D */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__3 = i__; + i__2 = kd1 + i__ * ab_dim1; + d__[i__3] = ab[i__2].r; +/* L120: */ + } + + } else { + + if (*kd > 1) { + +/* Reduce to complex Hermitian tridiagonal form, working with */ +/* the lower triangle */ + + nr = 0; + j1 = kdn + 2; + j2 = 1; + + i__1 = ab_dim1 + 1; + i__3 = ab_dim1 + 1; + d__1 = ab[i__3].r; + ab[i__1].r = d__1, ab[i__1].i = 0.; + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Reduce i-th column of matrix to tridiagonal form */ + + for (k = kdn + 1; k >= 2; --k) { + j1 += kdn; + j2 += kdn; + + if (nr > 0) { + +/* generate plane rotations to annihilate nonzero */ +/* elements which have been created outside the band */ + + zlargv_(&nr, &ab[kd1 + (j1 - kd1) * ab_dim1], &inca, & + work[j1], &kd1, &d__[j1], &kd1); + +/* apply plane rotations from one side */ + + +/* Dependent on the the number of diagonals either */ +/* ZLARTV or ZROT is used */ + + if (nr > (*kd << 1) - 1) { + i__3 = *kd - 1; + for (l = 1; l <= i__3; ++l) { + zlartv_(&nr, &ab[kd1 - l + (j1 - kd1 + l) * + ab_dim1], &inca, &ab[kd1 - l + 1 + ( + j1 - kd1 + l) * ab_dim1], &inca, &d__[ + j1], &work[j1], &kd1); +/* L130: */ + } + } else { + jend = j1 + kd1 * (nr - 1); + i__3 = jend; + i__2 = kd1; + for (jinc = j1; i__2 < 0 ? jinc >= i__3 : jinc <= + i__3; jinc += i__2) { + zrot_(&kdm1, &ab[*kd + (jinc - *kd) * ab_dim1] + , &incx, &ab[kd1 + (jinc - *kd) * + ab_dim1], &incx, &d__[jinc], &work[ + jinc]); +/* L140: */ + } + } + + } + + if (k > 2) { + if (k <= *n - i__ + 1) { + +/* generate plane rotation to annihilate a(i+k-1,i) */ +/* within the band */ + + zlartg_(&ab[k - 1 + i__ * ab_dim1], &ab[k + i__ * + ab_dim1], &d__[i__ + k - 1], &work[i__ + + k - 1], &temp); + i__2 = k - 1 + i__ * ab_dim1; + ab[i__2].r = temp.r, ab[i__2].i = temp.i; + +/* apply rotation from the left */ + + i__2 = k - 3; + i__3 = *ldab - 1; + i__4 = *ldab - 1; + zrot_(&i__2, &ab[k - 2 + (i__ + 1) * ab_dim1], & + i__3, &ab[k - 1 + (i__ + 1) * ab_dim1], & + i__4, &d__[i__ + k - 1], &work[i__ + k - + 1]); + } + ++nr; + j1 = j1 - kdn - 1; + } + +/* apply plane rotations from both sides to diagonal */ +/* blocks */ + + if (nr > 0) { + zlar2v_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &ab[j1 * + ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 2], & + inca, &d__[j1], &work[j1], &kd1); + } + +/* apply plane rotations from the right */ + + +/* Dependent on the the number of diagonals either */ +/* ZLARTV or ZROT is used */ + + if (nr > 0) { + zlacgv_(&nr, &work[j1], &kd1); + if (nr > (*kd << 1) - 1) { + i__2 = *kd - 1; + for (l = 1; l <= i__2; ++l) { + if (j2 + l > *n) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + zlartv_(&nrt, &ab[l + 2 + (j1 - 1) * + ab_dim1], &inca, &ab[l + 1 + j1 * + ab_dim1], &inca, &d__[j1], &work[ + j1], &kd1); + } +/* L150: */ + } + } else { + j1end = j1 + kd1 * (nr - 2); + if (j1end >= j1) { + i__2 = j1end; + i__3 = kd1; + for (j1inc = j1; i__3 < 0 ? j1inc >= i__2 : + j1inc <= i__2; j1inc += i__3) { + zrot_(&kdm1, &ab[(j1inc - 1) * ab_dim1 + + 3], &c__1, &ab[j1inc * ab_dim1 + + 2], &c__1, &d__[j1inc], &work[ + j1inc]); +/* L160: */ + } + } +/* Computing MIN */ + i__3 = kdm1, i__2 = *n - j2; + lend = f2cmin(i__3,i__2); + last = j1end + kd1; + if (lend > 0) { + zrot_(&lend, &ab[(last - 1) * ab_dim1 + 3], & + c__1, &ab[last * ab_dim1 + 2], &c__1, + &d__[last], &work[last]); + } + } + } + + + + if (wantq) { + +/* accumulate product of plane rotations in Q */ + + if (initq) { + +/* take advantage of the fact that Q was */ +/* initially the Identity matrix */ + + iqend = f2cmax(iqend,j2); +/* Computing MAX */ + i__3 = 0, i__2 = k - 3; + i2 = f2cmax(i__3,i__2); + iqaend = i__ * *kd + 1; + if (k == 2) { + iqaend += *kd; + } + iqaend = f2cmin(iqaend,iqend); + i__3 = j2; + i__2 = kd1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j + += i__2) { + ibl = i__ - i2 / kdm1; + ++i2; +/* Computing MAX */ + i__4 = 1, i__5 = j - ibl; + iqb = f2cmax(i__4,i__5); + nq = iqaend + 1 - iqb; +/* Computing MIN */ + i__4 = iqaend + *kd; + iqaend = f2cmin(i__4,iqend); + zrot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, + &q[iqb + j * q_dim1], &c__1, &d__[j], + &work[j]); +/* L170: */ + } + } else { + + i__2 = j2; + i__3 = kd1; + for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j + += i__3) { + zrot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ + j * q_dim1 + 1], &c__1, &d__[j], & + work[j]); +/* L180: */ + } + } + } + + if (j2 + kdn > *n) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 = j2 - kdn - 1; + } + + i__3 = j2; + i__2 = kd1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) + { + +/* create nonzero element a(j+kd,j-1) outside the */ +/* band and store it in WORK */ + + i__4 = j + *kd; + i__5 = j; + i__6 = kd1 + j * ab_dim1; + z__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * + ab[i__6].i, z__1.i = work[i__5].r * ab[i__6] + .i + work[i__5].i * ab[i__6].r; + work[i__4].r = z__1.r, work[i__4].i = z__1.i; + i__4 = kd1 + j * ab_dim1; + i__5 = j; + i__6 = kd1 + j * ab_dim1; + z__1.r = d__[i__5] * ab[i__6].r, z__1.i = d__[i__5] * + ab[i__6].i; + ab[i__4].r = z__1.r, ab[i__4].i = z__1.i; +/* L190: */ + } +/* L200: */ + } +/* L210: */ + } + } + + if (*kd > 0) { + +/* make off-diagonal elements real and copy them to E */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ * ab_dim1 + 2; + t.r = ab[i__2].r, t.i = ab[i__2].i; + abst = z_abs(&t); + i__2 = i__ * ab_dim1 + 2; + ab[i__2].r = abst, ab[i__2].i = 0.; + e[i__] = abst; + if (abst != 0.) { + z__1.r = t.r / abst, z__1.i = t.i / abst; + t.r = z__1.r, t.i = z__1.i; + } else { + t.r = 1., t.i = 0.; + } + if (i__ < *n - 1) { + i__2 = (i__ + 1) * ab_dim1 + 2; + i__3 = (i__ + 1) * ab_dim1 + 2; + z__1.r = ab[i__3].r * t.r - ab[i__3].i * t.i, z__1.i = ab[ + i__3].r * t.i + ab[i__3].i * t.r; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; + } + if (wantq) { + zscal_(n, &t, &q[(i__ + 1) * q_dim1 + 1], &c__1); + } +/* L220: */ + } + } else { + +/* set E to zero if original matrix was diagonal */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.; +/* L230: */ + } + } + +/* copy diagonal elements to D */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ * ab_dim1 + 1; + d__[i__2] = ab[i__3].r; +/* L240: */ + } + } + + return 0; + +/* End of ZHBTRD */ + +} /* zhbtrd_ */ + diff --git a/lapack-netlib/SRC/zhecon.c b/lapack-netlib/SRC/zhecon.c new file mode 100644 index 000000000..e2ba6c6d6 --- /dev/null +++ b/lapack-netlib/SRC/zhecon.c @@ -0,0 +1,635 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHECON */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHECON + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, */ +/* INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* DOUBLE PRECISION ANORM, RCOND */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHECON estimates the reciprocal of the condition number of a complex */ +/* > Hermitian matrix A using the factorization A = U*D*U**H or */ +/* > A = L*D*L**H computed by ZHETRF. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the details of the factorization are stored */ +/* > as an upper or lower triangular matrix. */ +/* > = 'U': Upper triangular, form is A = U*D*U**H; */ +/* > = 'L': Lower triangular, form is A = L*D*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The block diagonal matrix D and the multipliers used to */ +/* > obtain the factor U or L as computed by ZHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D */ +/* > as determined by ZHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is DOUBLE PRECISION */ +/* > The 1-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* > estimate of the 1-norm of inv(A) computed in this routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zhecon_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, + doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer kase, i__; + extern logical lsame_(char *, char *); + integer isave[3]; + logical upper; + extern /* Subroutine */ int zlacn2_(integer *, doublecomplex *, + doublecomplex *, doublereal *, integer *, integer *), xerbla_( + char *, integer *, ftnlen); + doublereal ainvnm; + extern /* Subroutine */ int zhetrs_(char *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*anorm < 0.) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHECON", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm <= 0.) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + for (i__ = *n; i__ >= 1; --i__) { + i__1 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) { + return 0; + } +/* L10: */ + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) { + return 0; + } +/* L20: */ + } + } + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; +L30: + zlacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave); + if (kase != 0) { + +/* Multiply by inv(L*D*L**H) or inv(U*D*U**H). */ + + zhetrs_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], n, + info); + goto L30; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + + return 0; + +/* End of ZHECON */ + +} /* zhecon_ */ + diff --git a/lapack-netlib/SRC/zhecon_3.c b/lapack-netlib/SRC/zhecon_3.c new file mode 100644 index 000000000..dbb80054c --- /dev/null +++ b/lapack-netlib/SRC/zhecon_3.c @@ -0,0 +1,675 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHECON_3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHECON_3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, */ +/* WORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* DOUBLE PRECISION ANORM, RCOND */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), E ( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > ZHECON_3 estimates the reciprocal of the condition number (in the */ +/* > 1-norm) of a complex Hermitian matrix A using the factorization */ +/* > computed by ZHETRF_RK or ZHETRF_BK: */ +/* > */ +/* > A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), */ +/* > */ +/* > where U (or L) is unit upper (or lower) triangular matrix, */ +/* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */ +/* > matrix, P**T is the transpose of P, and D is Hermitian and block */ +/* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ +/* > This routine uses BLAS3 solver ZHETRS_3. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the details of the factorization are */ +/* > stored as an upper or lower triangular matrix: */ +/* > = 'U': Upper triangular, form is A = P*U*D*(U**H)*(P**T); */ +/* > = 'L': Lower triangular, form is A = P*L*D*(L**H)*(P**T). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > Diagonal of the block diagonal matrix D and factors U or L */ +/* > as computed by ZHETRF_RK and ZHETRF_BK: */ +/* > a) ONLY diagonal elements of the Hermitian block diagonal */ +/* > matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */ +/* > (superdiagonal (or subdiagonal) elements of D */ +/* > should be provided on entry in array E), and */ +/* > b) If UPLO = 'U': factor U in the superdiagonal part of A. */ +/* > If UPLO = 'L': factor L in the subdiagonal part of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] E */ +/* > \verbatim */ +/* > E is COMPLEX*16 array, dimension (N) */ +/* > On entry, contains the superdiagonal (or subdiagonal) */ +/* > elements of the Hermitian block diagonal matrix D */ +/* > with 1-by-1 or 2-by-2 diagonal blocks, where */ +/* > If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; */ +/* > If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. */ +/* > */ +/* > NOTE: For 1-by-1 diagonal block D(k), where */ +/* > 1 <= k <= N, the element E(k) is not referenced in both */ +/* > UPLO = 'U' or UPLO = 'L' cases. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D */ +/* > as determined by ZHETRF_RK or ZHETRF_BK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is DOUBLE PRECISION */ +/* > The 1-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* > estimate of the 1-norm of inv(A) computed in this routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > \verbatim */ +/* > */ +/* > June 2017, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhecon_3_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublecomplex *e, integer *ipiv, doublereal *anorm, + doublereal *rcond, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + extern /* Subroutine */ int zhetrs_3_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *); + integer kase, i__; + extern logical lsame_(char *, char *); + integer isave[3]; + logical upper; + extern /* Subroutine */ int zlacn2_(integer *, doublecomplex *, + doublecomplex *, doublereal *, integer *, integer *), xerbla_( + char *, integer *, ftnlen); + doublereal ainvnm; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --e; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*anorm < 0.) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHECON_3", &i__1, (ftnlen)8); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm <= 0.) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + for (i__ = *n; i__ >= 1; --i__) { + i__1 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) { + return 0; + } + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) { + return 0; + } + } + } + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; +L30: + zlacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave); + if (kase != 0) { + +/* Multiply by inv(L*D*L**H) or inv(U*D*U**H). */ + + zhetrs_3_(uplo, n, &c__1, &a[a_offset], lda, &e[1], &ipiv[1], &work[ + 1], n, info); + goto L30; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + + return 0; + +/* End of ZHECON_3 */ + +} /* zhecon_3__ */ + diff --git a/lapack-netlib/SRC/zhecon_rook.c b/lapack-netlib/SRC/zhecon_rook.c new file mode 100644 index 000000000..57d679518 --- /dev/null +++ b/lapack-netlib/SRC/zhecon_rook.c @@ -0,0 +1,650 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHECON_ROOK estimates the reciprocal of the condition number fort HE matrices using factorizat +ion obtained with one of the bounded diagonal pivoting methods (f2cmax 2 interchanges) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHECON_ROOK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHECON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, */ +/* INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* DOUBLE PRECISION ANORM, RCOND */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHECON_ROOK estimates the reciprocal of the condition number of a complex */ +/* > Hermitian matrix A using the factorization A = U*D*U**H or */ +/* > A = L*D*L**H computed by CHETRF_ROOK. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the details of the factorization are stored */ +/* > as an upper or lower triangular matrix. */ +/* > = 'U': Upper triangular, form is A = U*D*U**H; */ +/* > = 'L': Lower triangular, form is A = L*D*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The block diagonal matrix D and the multipliers used to */ +/* > obtain the factor U or L as computed by CHETRF_ROOK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D */ +/* > as determined by CHETRF_ROOK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is DOUBLE PRECISION */ +/* > The 1-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* > estimate of the 1-norm of inv(A) computed in this routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > \verbatim */ +/* > */ +/* > June 2017, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhecon_rook_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, + doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + extern /* Subroutine */ int zhetrs_rook_(char *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *); + integer kase, i__; + extern logical lsame_(char *, char *); + integer isave[3]; + logical upper; + extern /* Subroutine */ int zlacn2_(integer *, doublecomplex *, + doublecomplex *, doublereal *, integer *, integer *), xerbla_( + char *, integer *, ftnlen); + doublereal ainvnm; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*anorm < 0.) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHECON_ROOK", &i__1, (ftnlen)11); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm <= 0.) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + for (i__ = *n; i__ >= 1; --i__) { + i__1 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) { + return 0; + } +/* L10: */ + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) { + return 0; + } +/* L20: */ + } + } + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; +L30: + zlacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave); + if (kase != 0) { + +/* Multiply by inv(L*D*L**H) or inv(U*D*U**H). */ + + zhetrs_rook_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], + n, info); + goto L30; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + + return 0; + +/* End of ZHECON_ROOK */ + +} /* zhecon_rook__ */ + diff --git a/lapack-netlib/SRC/zheequb.c b/lapack-netlib/SRC/zheequb.c new file mode 100644 index 000000000..cb3c949d0 --- /dev/null +++ b/lapack-netlib/SRC/zheequb.c @@ -0,0 +1,874 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHEEQUB */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEQUB + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) */ + +/* INTEGER INFO, LDA, N */ +/* DOUBLE PRECISION AMAX, SCOND */ +/* CHARACTER UPLO */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ +/* DOUBLE PRECISION S( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEQUB computes row and column scalings intended to equilibrate a */ +/* > Hermitian matrix A (with respect to the Euclidean norm) and reduce */ +/* > its condition number. The scale factors S are computed by the BIN */ +/* > algorithm (see references) so that the scaled matrix B with elements */ +/* > B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of */ +/* > the smallest possible condition number over all possible diagonal */ +/* > scalings. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The N-by-N Hermitian matrix whose scaling factors are to be */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, S contains the scale factors for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCOND */ +/* > \verbatim */ +/* > SCOND is DOUBLE PRECISION */ +/* > If INFO = 0, S contains the ratio of the smallest S(i) to */ +/* > the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ +/* > large nor too small, it is not worth scaling by S. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AMAX */ +/* > \verbatim */ +/* > AMAX is DOUBLE PRECISION */ +/* > Largest absolute value of any matrix element. If AMAX is */ +/* > very close to overflow or very close to underflow, the */ +/* > matrix should be scaled. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element is nonpositive. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par References: */ +/* ================ */ +/* > */ +/* > Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n */ +/* > Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n */ +/* > DOI 10.1023/B:NUMA.0000016606.32820.69 \n */ +/* > Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zheequb_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *s, doublereal *scond, doublereal *amax, + doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + doublereal base; + integer iter; + doublereal smin, smax, d__; + integer i__, j; + doublereal t, u, scale; + extern logical lsame_(char *, char *); + doublereal c0, c1, c2, sumsq; + extern doublereal dlamch_(char *); + doublereal si; + logical up; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum, smlnum; + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + doublereal avg, std, tol; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + --work; + + /* Function Body */ + *info = 0; + if (! (lsame_(uplo, "U") || lsame_(uplo, "L"))) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEQUB", &i__1, (ftnlen)7); + return 0; + } + up = lsame_(uplo, "U"); + *amax = 0.; + +/* Quick return if possible. */ + + if (*n == 0) { + *scond = 1.; + return 0; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + s[i__] = 0.; + } + *amax = 0.; + if (up) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + d__3 = s[i__], d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = + d_imag(&a[i__ + j * a_dim1]), abs(d__2)); + s[i__] = f2cmax(d__3,d__4); +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + d__3 = s[j], d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = + d_imag(&a[i__ + j * a_dim1]), abs(d__2)); + s[j] = f2cmax(d__3,d__4); +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + d__3 = *amax, d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = + d_imag(&a[i__ + j * a_dim1]), abs(d__2)); + *amax = f2cmax(d__3,d__4); + } +/* Computing MAX */ + i__2 = j + j * a_dim1; + d__3 = s[j], d__4 = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = + d_imag(&a[j + j * a_dim1]), abs(d__2)); + s[j] = f2cmax(d__3,d__4); +/* Computing MAX */ + i__2 = j + j * a_dim1; + d__3 = *amax, d__4 = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = + d_imag(&a[j + j * a_dim1]), abs(d__2)); + *amax = f2cmax(d__3,d__4); + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j + j * a_dim1; + d__3 = s[j], d__4 = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = + d_imag(&a[j + j * a_dim1]), abs(d__2)); + s[j] = f2cmax(d__3,d__4); +/* Computing MAX */ + i__2 = j + j * a_dim1; + d__3 = *amax, d__4 = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = + d_imag(&a[j + j * a_dim1]), abs(d__2)); + *amax = f2cmax(d__3,d__4); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + d__3 = s[i__], d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = + d_imag(&a[i__ + j * a_dim1]), abs(d__2)); + s[i__] = f2cmax(d__3,d__4); +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + d__3 = s[j], d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = + d_imag(&a[i__ + j * a_dim1]), abs(d__2)); + s[j] = f2cmax(d__3,d__4); +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + d__3 = *amax, d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = + d_imag(&a[i__ + j * a_dim1]), abs(d__2)); + *amax = f2cmax(d__3,d__4); + } + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + s[j] = 1. / s[j]; + } + tol = 1. / sqrt(*n * 2.); + for (iter = 1; iter <= 100; ++iter) { + scale = 0.; + sumsq = 0.; +/* beta = |A|s */ + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + work[i__2].r = 0., work[i__2].i = 0.; + } + if (up) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + d__3 = ((d__1 = a[i__5].r, abs(d__1)) + (d__2 = d_imag(&a[ + i__ + j * a_dim1]), abs(d__2))) * s[j]; + z__1.r = work[i__4].r + d__3, z__1.i = work[i__4].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + i__3 = j; + i__4 = j; + i__5 = i__ + j * a_dim1; + d__3 = ((d__1 = a[i__5].r, abs(d__1)) + (d__2 = d_imag(&a[ + i__ + j * a_dim1]), abs(d__2))) * s[i__]; + z__1.r = work[i__4].r + d__3, z__1.i = work[i__4].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + d__3 = ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[j + + j * a_dim1]), abs(d__2))) * s[j]; + z__1.r = work[i__3].r + d__3, z__1.i = work[i__3].i; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + d__3 = ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[j + + j * a_dim1]), abs(d__2))) * s[j]; + z__1.r = work[i__3].r + d__3, z__1.i = work[i__3].i; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + d__3 = ((d__1 = a[i__5].r, abs(d__1)) + (d__2 = d_imag(&a[ + i__ + j * a_dim1]), abs(d__2))) * s[j]; + z__1.r = work[i__4].r + d__3, z__1.i = work[i__4].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + i__3 = j; + i__4 = j; + i__5 = i__ + j * a_dim1; + d__3 = ((d__1 = a[i__5].r, abs(d__1)) + (d__2 = d_imag(&a[ + i__ + j * a_dim1]), abs(d__2))) * s[i__]; + z__1.r = work[i__4].r + d__3, z__1.i = work[i__4].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + } + } + } +/* avg = s^T beta / n */ + avg = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__2.r = s[i__2] * work[i__3].r, z__2.i = s[i__2] * work[i__3].i; + z__1.r = avg + z__2.r, z__1.i = z__2.i; + avg = z__1.r; + } + avg /= *n; + std = 0.; + i__1 = *n << 1; + for (i__ = *n + 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ - *n; + i__4 = i__ - *n; + z__2.r = s[i__3] * work[i__4].r, z__2.i = s[i__3] * work[i__4].i; + z__1.r = z__2.r - avg, z__1.i = z__2.i; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; + } + zlassq_(n, &work[*n + 1], &c__1, &scale, &sumsq); + std = scale * sqrt(sumsq / *n); + if (std < tol * avg) { + goto L999; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + t = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + i__ * + a_dim1]), abs(d__2)); + si = s[i__]; + c2 = (*n - 1) * t; + i__2 = *n - 2; + i__3 = i__; + d__1 = t * si; + z__2.r = work[i__3].r - d__1, z__2.i = work[i__3].i; + d__2 = (doublereal) i__2; + z__1.r = d__2 * z__2.r, z__1.i = d__2 * z__2.i; + c1 = z__1.r; + d__1 = -(t * si) * si; + i__2 = i__; + d__2 = 2.; + z__4.r = d__2 * work[i__2].r, z__4.i = d__2 * work[i__2].i; + z__3.r = si * z__4.r, z__3.i = si * z__4.i; + z__2.r = d__1 + z__3.r, z__2.i = z__3.i; + d__3 = *n * avg; + z__1.r = z__2.r - d__3, z__1.i = z__2.i; + c0 = z__1.r; + d__ = c1 * c1 - c0 * 4 * c2; + if (d__ <= 0.) { + *info = -1; + return 0; + } + si = c0 * -2 / (c1 + sqrt(d__)); + d__ = si - s[i__]; + u = 0.; + if (up) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * a_dim1; + t = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + + i__ * a_dim1]), abs(d__2)); + u += s[j] * t; + i__3 = j; + i__4 = j; + d__1 = d__ * t; + z__1.r = work[i__4].r + d__1, z__1.i = work[i__4].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = i__ + j * a_dim1; + t = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + + j * a_dim1]), abs(d__2)); + u += s[j] * t; + i__3 = j; + i__4 = j; + d__1 = d__ * t; + z__1.r = work[i__4].r + d__1, z__1.i = work[i__4].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + } + } else { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + j * a_dim1; + t = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + + j * a_dim1]), abs(d__2)); + u += s[j] * t; + i__3 = j; + i__4 = j; + d__1 = d__ * t; + z__1.r = work[i__4].r + d__1, z__1.i = work[i__4].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j + i__ * a_dim1; + t = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + + i__ * a_dim1]), abs(d__2)); + u += s[j] * t; + i__3 = j; + i__4 = j; + d__1 = d__ * t; + z__1.r = work[i__4].r + d__1, z__1.i = work[i__4].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; + } + } + i__2 = i__; + z__4.r = u + work[i__2].r, z__4.i = work[i__2].i; + z__3.r = d__ * z__4.r, z__3.i = d__ * z__4.i; + d__1 = (doublereal) (*n); + z__2.r = z__3.r / d__1, z__2.i = z__3.i / d__1; + z__1.r = avg + z__2.r, z__1.i = z__2.i; + avg = z__1.r; + s[i__] = si; + } + } +L999: + smlnum = dlamch_("SAFEMIN"); + bignum = 1. / smlnum; + smin = bignum; + smax = 0.; + t = 1. / sqrt(avg); + base = dlamch_("B"); + u = 1. / log(base); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = (integer) (u * log(s[i__] * t)); + s[i__] = pow_di(&base, &i__2); +/* Computing MIN */ + d__1 = smin, d__2 = s[i__]; + smin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = smax, d__2 = s[i__]; + smax = f2cmax(d__1,d__2); + } + *scond = f2cmax(smin,smlnum) / f2cmin(smax,bignum); + + return 0; +} /* zheequb_ */ + diff --git a/lapack-netlib/SRC/zheev.c b/lapack-netlib/SRC/zheev.c new file mode 100644 index 000000000..a7578e83b --- /dev/null +++ b/lapack-netlib/SRC/zheev.c @@ -0,0 +1,726 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matr +ices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */ +/* INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEV computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,2*N-1). */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the blocksize for ZHETRD returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, 3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int zheev_(char *jobz, char *uplo, integer *n, doublecomplex + *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork, + doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo; + logical lower, wantz; + integer nb; + extern doublereal dlamch_(char *); + integer iscale; + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *); + integer indwrk; + extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, + doublecomplex *, integer *, integer *); + integer llwork; + doublereal smlnum; + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *), zungtr_(char *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *); + doublereal eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,i__2); + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + +/* Computing MAX */ + i__1 = 1, i__2 = (*n << 1) - 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + work[1].r = 1., work[1].i = 0.; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + zlascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* ZUNGTR to generate the unitary matrix, then call ZSTEQR. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zungtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], & + llwork, &iinfo); + indwrk = inde + *n; + zsteqr_(jobz, n, &w[1], &rwork[inde], &a[a_offset], lda, &rwork[ + indwrk], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHEEV */ + +} /* zheev_ */ + diff --git a/lapack-netlib/SRC/zheev_2stage.c b/lapack-netlib/SRC/zheev_2stage.c new file mode 100644 index 000000000..dd0901364 --- /dev/null +++ b/lapack-netlib/SRC/zheev_2stage.c @@ -0,0 +1,785 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for +HE matrices */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEV_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, */ +/* RWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, 3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zheev_2stage_(char *jobz, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + doublereal anrm; + integer imax; + doublereal rmin, rmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zhetrd_2stage_(char *, char *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *); + integer iinfo, lhtrd, lwmin; + logical lower; + integer lwtrd; + logical wantz; + integer ib, kd; + extern doublereal dlamch_(char *); + integer iscale; + doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *); + integer indwrk, llwork; + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *), zungtr_(char *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *); + doublereal eps; + integer indhous; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + + if (*info == 0) { + kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", jobz, n, &c_n1, &c_n1, & + c_n1); + ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1); + lhtrd = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwmin = *n + lhtrd + lwtrd; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEV_2STAGE ", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + work[1].r = 1., work[1].i = 0.; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + zlascl_(uplo, &c__0, &c__0, &c_b28, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indhous = indtau + *n; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + + zhetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], & + work[indtau], &work[indhous], &lhtrd, &work[indwrk], &llwork, & + iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* ZUNGTR to generate the unitary matrix, then call ZSTEQR. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zungtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], & + llwork, &iinfo); + indwrk = inde + *n; + zsteqr_(jobz, n, &w[1], &rwork[inde], &a[a_offset], lda, &rwork[ + indwrk], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + + return 0; + +/* End of ZHEEV_2STAGE */ + +} /* zheev_2stage__ */ + diff --git a/lapack-netlib/SRC/zheevd.c b/lapack-netlib/SRC/zheevd.c new file mode 100644 index 000000000..85a06acfa --- /dev/null +++ b/lapack-netlib/SRC/zheevd.c @@ -0,0 +1,831 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHEEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */ +/* LRWORK, IWORK, LIWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A. If eigenvectors are desired, it uses a */ +/* > divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. */ +/* > If N <= 1, LWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, */ +/* > dimension (LRWORK) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of the array RWORK. */ +/* > If N <= 1, LRWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i and JOBZ = 'N', then the algorithm failed */ +/* > to converge; i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > if INFO = i and JOBZ = 'V', then the algorithm failed */ +/* > to compute an eigenvalue while working on the submatrix */ +/* > lying in rows and columns INFO/(N+1) through */ +/* > mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > Modified description of INFO. Sven, 16 Feb 05. */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zheevd_(char *jobz, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + integer lopt; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo, lwmin, liopt; + logical lower; + integer llrwk, lropt; + logical wantz; + integer indwk2, llwrk2; + extern doublereal dlamch_(char *); + integer iscale; + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zstedc_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *, integer + *); + integer indrwk, indwrk, liwmin; + extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, + doublecomplex *, integer *, integer *), zlacpy_(char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer lrwmin, llwork; + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int zunmtr_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + } else { + if (wantz) { + lwmin = (*n << 1) + *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1; + lrwmin = *n; + liwmin = 1; + } +/* Computing MAX */ + i__1 = lwmin, i__2 = *n + ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, + &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lopt = f2cmax(i__1,i__2); + lropt = lrwmin; + liopt = liwmin; + } + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + zlascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + indrwk = inde + *n; + indwk2 = indwrk + *n * *n; + llwork = *lwork - indwrk + 1; + llwrk2 = *lwork - indwk2 + 1; + llrwk = *lrwork - indrwk + 1; + zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ +/* tridiagonal matrix, then call ZUNMTR to multiply it to the */ +/* Householder transformations represented as Householder vectors in */ +/* A. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], + &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + zunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + zlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + + return 0; + +/* End of ZHEEVD */ + +} /* zheevd_ */ + diff --git a/lapack-netlib/SRC/zheevd_2stage.c b/lapack-netlib/SRC/zheevd_2stage.c new file mode 100644 index 000000000..ecb86b947 --- /dev/null +++ b/lapack-netlib/SRC/zheevd_2stage.c @@ -0,0 +1,886 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + HE matrices */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEVD_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, */ +/* RWORK, LRWORK, IWORK, LIWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. If eigenvectors are desired, it uses a */ +/* > divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N <= 1, LWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N+1 */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N+1 */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2 */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, */ +/* > dimension (LRWORK) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of the array RWORK. */ +/* > If N <= 1, LRWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i and JOBZ = 'N', then the algorithm failed */ +/* > to converge; i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > if INFO = i and JOBZ = 'V', then the algorithm failed */ +/* > to compute an eigenvalue while working on the submatrix */ +/* > lying in rows and columns INFO/(N+1) through */ +/* > mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > Modified description of INFO. Sven, 16 Feb 05. */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zheevd_2stage_(char *jobz, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + doublereal d__1; + + /* Local variables */ + integer inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + doublereal anrm; + integer imax; + doublereal rmin, rmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zhetrd_2stage_(char *, char *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *); + integer iinfo, lhtrd, lwmin; + logical lower; + integer llrwk, lwtrd; + logical wantz; + integer indwk2, ib, llwrk2, kd; + extern doublereal dlamch_(char *); + integer iscale; + doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zstedc_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *, integer + *); + integer indrwk, indwrk, liwmin; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer lrwmin, llwork; + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int zunmtr_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + doublereal eps; + integer indhous; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + } else { + kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", jobz, n, &c_n1, &c_n1, + &c_n1); + ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", jobz, n, &kd, &c_n1, & + c_n1); + lhtrd = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + if (wantz) { + lwmin = (*n << 1) + *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1 + lhtrd + lwtrd; + lrwmin = *n; + liwmin = 1; + } + } + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEVD_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + zlascl_(uplo, &c__0, &c__0, &c_b28, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indrwk = inde + *n; + llrwk = *lrwork - indrwk + 1; + indtau = 1; + indhous = indtau + *n; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + + zhetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], & + work[indtau], &work[indhous], &lhtrd, &work[indwrk], &llwork, & + iinfo); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ +/* tridiagonal matrix, then call ZUNMTR to multiply it to the */ +/* Householder transformations represented as Householder vectors in */ +/* A. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], + &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + zunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + zlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of ZHEEVD_2STAGE */ + +} /* zheevd_2stage__ */ + diff --git a/lapack-netlib/SRC/zheevr.c b/lapack-netlib/SRC/zheevr.c new file mode 100644 index 000000000..a38dfd629 --- /dev/null +++ b/lapack-netlib/SRC/zheevr.c @@ -0,0 +1,1191 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHEEVR computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEVR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, */ +/* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, */ +/* RWORK, LRWORK, IWORK, LIWORK, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, */ +/* $ M, N */ +/* DOUBLE PRECISION ABSTOL, VL, VU */ +/* INTEGER ISUPPZ( * ), IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEVR computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian matrix A. Eigenvalues and eigenvectors can */ +/* > be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > */ +/* > ZHEEVR first reduces the matrix A to tridiagonal form T with a call */ +/* > to ZHETRD. Then, whenever possible, ZHEEVR calls ZSTEMR to compute */ +/* > eigenspectrum using Relatively Robust Representations. ZSTEMR */ +/* > computes eigenvalues by the dqds algorithm, while orthogonal */ +/* > eigenvectors are computed from various "good" L D L^T representations */ +/* > (also known as Relatively Robust Representations). Gram-Schmidt */ +/* > orthogonalization is avoided as far as possible. More specifically, */ +/* > the various steps of the algorithm are as follows. */ +/* > */ +/* > For each unreduced block (submatrix) of T, */ +/* > (a) Compute T - sigma I = L D L^T, so that L and D */ +/* > define all the wanted eigenvalues to high relative accuracy. */ +/* > This means that small relative changes in the entries of D and L */ +/* > cause only small relative changes in the eigenvalues and */ +/* > eigenvectors. The standard (unfactored) representation of the */ +/* > tridiagonal matrix T does not have this property in general. */ +/* > (b) Compute the eigenvalues to suitable accuracy. */ +/* > If the eigenvectors are desired, the algorithm attains full */ +/* > accuracy of the computed eigenvalues only right before */ +/* > the corresponding vectors have to be computed, see steps c) and d). */ +/* > (c) For each cluster of close eigenvalues, select a new */ +/* > shift close to the cluster, find a new factorization, and refine */ +/* > the shifted eigenvalues to suitable accuracy. */ +/* > (d) For each eigenvalue with a large enough relative separation compute */ +/* > the corresponding eigenvector by forming a rank revealing twisted */ +/* > factorization. Go back to (c) for any clusters that remain. */ +/* > */ +/* > The desired accuracy of the output can be specified by the input */ +/* > parameter ABSTOL. */ +/* > */ +/* > For more details, see DSTEMR's documentation and: */ +/* > - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ +/* > to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ +/* > Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ +/* > - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ +/* > Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ +/* > 2004. Also LAPACK Working Note 154. */ +/* > - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ +/* > tridiagonal eigenvalue/eigenvector problem", */ +/* > Computer Science Division Technical Report No. UCB/CSD-97-971, */ +/* > UC Berkeley, May 1997. */ +/* > */ +/* > */ +/* > Note 1 : ZHEEVR calls ZSTEMR when the full spectrum is requested */ +/* > on machines which conform to the ieee-754 floating point standard. */ +/* > ZHEEVR calls DSTEBZ and ZSTEIN on non-ieee machines and */ +/* > when partial spectrum requests are made. */ +/* > */ +/* > Normal execution of ZSTEMR may create NaNs and infinities and */ +/* > hence may abort due to a floating point exception in environments */ +/* > which do not handle NaNs and infinities in the ieee standard default */ +/* > manner. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and */ +/* > ZSTEIN are called */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is DOUBLE PRECISION */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing A to tridiagonal form. */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > */ +/* > If high relative accuracy is important, set ABSTOL to */ +/* > DLAMCH( 'Safe minimum' ). Doing so will guarantee that */ +/* > eigenvalues are computed to high relative accuracy when */ +/* > possible in future releases. The current code does not */ +/* > make any guarantees about high relative accuracy, but */ +/* > future releases will. See J. Barlow and J. Demmel, */ +/* > "Computing Accurate Eigensystems of Scaled Diagonally */ +/* > Dominant Matrices", LAPACK Working Note #7, for a discussion */ +/* > of which matrices define their eigenvalues to high relative */ +/* > accuracy. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > The first M elements contain the selected eigenvalues in */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ISUPPZ */ +/* > \verbatim */ +/* > ISUPPZ is INTEGER array, dimension ( 2*f2cmax(1,M) ) */ +/* > The support of the eigenvectors in Z, i.e., the indices */ +/* > indicating the nonzero elements in Z. The i-th eigenvector */ +/* > is nonzero only in elements ISUPPZ( 2*i-1 ) through */ +/* > ISUPPZ( 2*i ). This is an output of ZSTEMR (tridiagonal */ +/* > matrix). The support of the eigenvectors of A is typically */ +/* > 1:N because of the unitary transformations applied by ZUNMTR. */ +/* > Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the f2cmax of the blocksize for ZHETRD and for */ +/* > ZUNMTR as returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal */ +/* > (and minimal) LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The length of the array RWORK. LRWORK >= f2cmax(1,24*N). */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal */ +/* > (and minimal) LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. LIWORK >= f2cmax(1,10*N). */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: Internal error */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Inderjit Dhillon, IBM Almaden, USA \n */ +/* > Osni Marques, LBNL/NERSC, USA \n */ +/* > Ken Stanley, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > Jason Riedy, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zheevr_(char *jobz, char *range, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, + integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * + w, doublecomplex *z__, integer *ldz, integer *isuppz, doublecomplex * + work, integer *lwork, doublereal *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + doublereal anrm; + integer imax; + doublereal rmin, rmax; + logical test; + integer itmp1, i__, j; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + integer indrd, indre; + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo; + char order[1]; + integer indwk; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer lwmin; + logical lower, wantz; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer nb, jj; + extern doublereal dlamch_(char *); + logical alleig, indeig; + integer iscale, ieeeok, indibl, indrdd, indifl, indree; + logical valeig; + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + doublereal abstll, bignum; + integer indtau, indisp; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *); + integer indiwo, indwkn; + extern /* Subroutine */ int dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indrwk, liwmin; + extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, + doublecomplex *, integer *, integer *); + logical tryrac; + integer lrwmin, llwrkn, llwork, nsplit; + doublereal smlnum; + extern /* Subroutine */ int zstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *); + logical lquery; + integer lwkopt; + extern doublereal zlansy_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zstemr_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, doublereal *, doublecomplex *, integer *, integer *, + integer *, logical *, doublereal *, integer *, integer *, integer + *, integer *), zunmtr_(char *, char *, char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ); + doublereal eps, vll, vuu; + integer llrwork; + doublereal tmp1; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --isuppz; + --work; + --rwork; + --iwork; + + /* Function Body */ + ieeeok = ilaenv_(&c__10, "ZHEEVR", "N", &c__1, &c__2, &c__3, &c__4, ( + ftnlen)6, (ftnlen)1); + + lower = lsame_(uplo, "L"); + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + +/* Computing MAX */ + i__1 = 1, i__2 = *n * 24; + lrwmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n * 10; + liwmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + lwmin = f2cmax(i__1,i__2); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -9; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "ZUNMTR", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nb = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,lwmin); + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -18; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -20; + } else if (*liwork < liwmin && ! lquery) { + *info = -22; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEVR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (*n == 1) { + work[1].r = 2., work[1].i = 0.; + if (alleig || indeig) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } else { + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + if (*vl < a[i__1].r && *vu >= a[i__2].r) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + isuppz[1] = 1; + isuppz[2] = 1; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = f2cmin(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } + anrm = zlansy_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + zdscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + zdscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } +/* Initialize indices into workspaces. Note: The IWORK indices are */ +/* used only if DSTERF or ZSTEMR fail. */ +/* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the */ +/* elementary reflectors used in ZHETRD. */ + indtau = 1; +/* INDWK is the starting offset of the remaining complex workspace, */ +/* and LLWORK is the remaining complex workspace size. */ + indwk = indtau + *n; + llwork = *lwork - indwk + 1; +/* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal */ +/* entries. */ + indrd = 1; +/* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the */ +/* tridiagonal matrix from ZHETRD. */ + indre = indrd + *n; +/* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over */ +/* -written by ZSTEMR (the DSTERF path copies the diagonal to W). */ + indrdd = indre + *n; +/* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over */ +/* -written while computing the eigenvalues in DSTERF and ZSTEMR. */ + indree = indrdd + *n; +/* INDRWK is the starting offset of the left-over real workspace, and */ +/* LLRWORK is the remaining workspace size. */ + indrwk = indree + *n; + llrwork = *lrwork - indrwk + 1; +/* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and */ +/* stores the block indices of each of the M<=N eigenvalues. */ + indibl = 1; +/* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and */ +/* stores the starting and finishing indices of each block. */ + indisp = indibl + *n; +/* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */ +/* that corresponding to eigenvectors that fail to converge in */ +/* DSTEIN. This information is discarded; if any fail, the driver */ +/* returns INFO > 0. */ + indifl = indisp + *n; +/* INDIWO is the offset of the remaining integer workspace. */ + indiwo = indifl + *n; + +/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ + + zhetrd_(uplo, n, &a[a_offset], lda, &rwork[indrd], &rwork[indre], &work[ + indtau], &work[indwk], &llwork, &iinfo); + +/* If all eigenvalues are desired */ +/* then call DSTERF or ZSTEMR and ZUNMTR. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && ieeeok == 1) { + if (! wantz) { + dcopy_(n, &rwork[indrd], &c__1, &w[1], &c__1); + i__1 = *n - 1; + dcopy_(&i__1, &rwork[indre], &c__1, &rwork[indree], &c__1); + dsterf_(n, &w[1], &rwork[indree], info); + } else { + i__1 = *n - 1; + dcopy_(&i__1, &rwork[indre], &c__1, &rwork[indree], &c__1); + dcopy_(n, &rwork[indrd], &c__1, &rwork[indrdd], &c__1); + + if (*abstol <= *n * 2. * eps) { + tryrac = TRUE_; + } else { + tryrac = FALSE_; + } + zstemr_(jobz, "A", n, &rwork[indrdd], &rwork[indree], vl, vu, il, + iu, m, &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, + &rwork[indrwk], &llrwork, &iwork[1], liwork, info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by ZSTEMR. */ + + if (wantz && *info == 0) { + indwkn = indwk; + llwrkn = *lwork - indwkn + 1; + zunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau] + , &z__[z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + } + + + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. */ +/* Also call DSTEBZ and ZSTEIN if ZSTEMR fails. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indrd], & + rwork[indre], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwo], info); + + if (wantz) { + zstein_(n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwo], &iwork[indifl], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by ZSTEIN. */ + + indwkn = indwk; + llwrkn = *lwork - indwkn + 1; + zunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + } +/* L50: */ + } + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of ZHEEVR */ + +} /* zheevr_ */ + diff --git a/lapack-netlib/SRC/zheevr_2stage.c b/lapack-netlib/SRC/zheevr_2stage.c new file mode 100644 index 000000000..ed3637b67 --- /dev/null +++ b/lapack-netlib/SRC/zheevr_2stage.c @@ -0,0 +1,1239 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHEEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + HE matrices */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEVR_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, */ +/* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, */ +/* WORK, LWORK, RWORK, LRWORK, IWORK, */ +/* LIWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, */ +/* $ M, N */ +/* DOUBLE PRECISION ABSTOL, VL, VU */ +/* INTEGER ISUPPZ( * ), IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. Eigenvalues and eigenvectors can */ +/* > be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > */ +/* > ZHEEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call */ +/* > to ZHETRD. Then, whenever possible, ZHEEVR_2STAGE calls ZSTEMR to compute */ +/* > eigenspectrum using Relatively Robust Representations. ZSTEMR */ +/* > computes eigenvalues by the dqds algorithm, while orthogonal */ +/* > eigenvectors are computed from various "good" L D L^T representations */ +/* > (also known as Relatively Robust Representations). Gram-Schmidt */ +/* > orthogonalization is avoided as far as possible. More specifically, */ +/* > the various steps of the algorithm are as follows. */ +/* > */ +/* > For each unreduced block (submatrix) of T, */ +/* > (a) Compute T - sigma I = L D L^T, so that L and D */ +/* > define all the wanted eigenvalues to high relative accuracy. */ +/* > This means that small relative changes in the entries of D and L */ +/* > cause only small relative changes in the eigenvalues and */ +/* > eigenvectors. The standard (unfactored) representation of the */ +/* > tridiagonal matrix T does not have this property in general. */ +/* > (b) Compute the eigenvalues to suitable accuracy. */ +/* > If the eigenvectors are desired, the algorithm attains full */ +/* > accuracy of the computed eigenvalues only right before */ +/* > the corresponding vectors have to be computed, see steps c) and d). */ +/* > (c) For each cluster of close eigenvalues, select a new */ +/* > shift close to the cluster, find a new factorization, and refine */ +/* > the shifted eigenvalues to suitable accuracy. */ +/* > (d) For each eigenvalue with a large enough relative separation compute */ +/* > the corresponding eigenvector by forming a rank revealing twisted */ +/* > factorization. Go back to (c) for any clusters that remain. */ +/* > */ +/* > The desired accuracy of the output can be specified by the input */ +/* > parameter ABSTOL. */ +/* > */ +/* > For more details, see DSTEMR's documentation and: */ +/* > - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ +/* > to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ +/* > Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ +/* > - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ +/* > Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ +/* > 2004. Also LAPACK Working Note 154. */ +/* > - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ +/* > tridiagonal eigenvalue/eigenvector problem", */ +/* > Computer Science Division Technical Report No. UCB/CSD-97-971, */ +/* > UC Berkeley, May 1997. */ +/* > */ +/* > */ +/* > Note 1 : ZHEEVR_2STAGE calls ZSTEMR when the full spectrum is requested */ +/* > on machines which conform to the ieee-754 floating point standard. */ +/* > ZHEEVR_2STAGE calls DSTEBZ and ZSTEIN on non-ieee machines and */ +/* > when partial spectrum requests are made. */ +/* > */ +/* > Normal execution of ZSTEMR may create NaNs and infinities and */ +/* > hence may abort due to a floating point exception in environments */ +/* > which do not handle NaNs and infinities in the ieee standard default */ +/* > manner. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and */ +/* > ZSTEIN are called */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is DOUBLE PRECISION */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing A to tridiagonal form. */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > */ +/* > If high relative accuracy is important, set ABSTOL to */ +/* > DLAMCH( 'Safe minimum' ). Doing so will guarantee that */ +/* > eigenvalues are computed to high relative accuracy when */ +/* > possible in future releases. The current code does not */ +/* > make any guarantees about high relative accuracy, but */ +/* > future releases will. See J. Barlow and J. Demmel, */ +/* > "Computing Accurate Eigensystems of Scaled Diagonally */ +/* > Dominant Matrices", LAPACK Working Note #7, for a discussion */ +/* > of which matrices define their eigenvalues to high relative */ +/* > accuracy. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > The first M elements contain the selected eigenvalues in */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ISUPPZ */ +/* > \verbatim */ +/* > ISUPPZ is INTEGER array, dimension ( 2*f2cmax(1,M) ) */ +/* > The support of the eigenvectors in Z, i.e., the indices */ +/* > indicating the nonzero elements in Z. The i-th eigenvector */ +/* > is nonzero only in elements ISUPPZ( 2*i-1 ) through */ +/* > ISUPPZ( 2*i ). This is an output of ZSTEMR (tridiagonal */ +/* > matrix). The support of the eigenvectors of A is typically */ +/* > 1:N because of the unitary transformations applied by ZUNMTR. */ +/* > Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, 26*N, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal */ +/* > (and minimal) LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The length of the array RWORK. LRWORK >= f2cmax(1,24*N). */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal */ +/* > (and minimal) LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. LIWORK >= f2cmax(1,10*N). */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: Internal error */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Inderjit Dhillon, IBM Almaden, USA \n */ +/* > Osni Marques, LBNL/NERSC, USA \n */ +/* > Ken Stanley, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > Jason Riedy, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > */ +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zheevr_2stage_(char *jobz, char *range, char *uplo, + integer *n, doublecomplex *a, integer *lda, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer + *m, doublereal *w, doublecomplex *z__, integer *ldz, integer *isuppz, + doublecomplex *work, integer *lwork, doublereal *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + doublereal anrm; + integer imax; + doublereal rmin, rmax; + logical test; + integer itmp1, i__, j; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + integer indrd, indre; + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int zhetrd_2stage_(char *, char *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *); + char order[1]; + integer indwk, lhtrd; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer lwmin; + logical lower; + integer lwtrd; + logical wantz; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer ib, kd, jj; + extern doublereal dlamch_(char *); + logical alleig, indeig; + integer iscale, ieeeok, indibl, indrdd, indifl, indree; + logical valeig; + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + doublereal abstll, bignum; + integer indtau, indisp; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *); + integer indiwo, indwkn; + extern /* Subroutine */ int dstebz_(char *, char *, integer *, doublereal + *, doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indrwk, liwmin; + logical tryrac; + integer lrwmin, llwrkn, llwork, nsplit; + doublereal smlnum; + extern /* Subroutine */ int zstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *); + logical lquery; + extern doublereal zlansy_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zstemr_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, doublereal *, doublecomplex *, integer *, integer *, + integer *, logical *, doublereal *, integer *, integer *, integer + *, integer *), zunmtr_(char *, char *, char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ); + doublereal eps, vll, vuu; + integer indhous, llrwork; + doublereal tmp1; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --isuppz; + --work; + --rwork; + --iwork; + + /* Function Body */ + ieeeok = ilaenv_(&c__10, "ZHEEVR", "N", &c__1, &c__2, &c__3, &c__4, ( + ftnlen)6, (ftnlen)1); + + lower = lsame_(uplo, "L"); + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", jobz, n, &c_n1, &c_n1, &c_n1); + ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1); + lhtrd = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", jobz, n, &kd, &ib, &c_n1); + lwtrd = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", jobz, n, &kd, &ib, &c_n1); + lwmin = *n + lhtrd + lwtrd; +/* Computing MAX */ + i__1 = 1, i__2 = *n * 24; + lrwmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n * 10; + liwmin = f2cmax(i__1,i__2); + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -9; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } + } + + if (*info == 0) { + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -18; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -20; + } else if (*liwork < liwmin && ! lquery) { + *info = -22; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEVR_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (*n == 1) { + work[1].r = 2., work[1].i = 0.; + if (alleig || indeig) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } else { + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + if (*vl < a[i__1].r && *vu >= a[i__2].r) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + isuppz[1] = 1; + isuppz[2] = 1; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = f2cmin(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } + anrm = zlansy_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + zdscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + zdscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } +/* Initialize indices into workspaces. Note: The IWORK indices are */ +/* used only if DSTERF or ZSTEMR fail. */ +/* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the */ +/* elementary reflectors used in ZHETRD. */ + indtau = 1; +/* INDWK is the starting offset of the remaining complex workspace, */ +/* and LLWORK is the remaining complex workspace size. */ + indhous = indtau + *n; + indwk = indhous + lhtrd; + llwork = *lwork - indwk + 1; +/* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal */ +/* entries. */ + indrd = 1; +/* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the */ +/* tridiagonal matrix from ZHETRD. */ + indre = indrd + *n; +/* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over */ +/* -written by ZSTEMR (the DSTERF path copies the diagonal to W). */ + indrdd = indre + *n; +/* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over */ +/* -written while computing the eigenvalues in DSTERF and ZSTEMR. */ + indree = indrdd + *n; +/* INDRWK is the starting offset of the left-over real workspace, and */ +/* LLRWORK is the remaining workspace size. */ + indrwk = indree + *n; + llrwork = *lrwork - indrwk + 1; +/* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and */ +/* stores the block indices of each of the M<=N eigenvalues. */ + indibl = 1; +/* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and */ +/* stores the starting and finishing indices of each block. */ + indisp = indibl + *n; +/* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */ +/* that corresponding to eigenvectors that fail to converge in */ +/* ZSTEIN. This information is discarded; if any fail, the driver */ +/* returns INFO > 0. */ + indifl = indisp + *n; +/* INDIWO is the offset of the remaining integer workspace. */ + indiwo = indifl + *n; + +/* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */ + + zhetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &rwork[indrd], &rwork[ + indre], &work[indtau], &work[indhous], &lhtrd, &work[indwk], & + llwork, &iinfo); + +/* If all eigenvalues are desired */ +/* then call DSTERF or ZSTEMR and ZUNMTR. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && ieeeok == 1) { + if (! wantz) { + dcopy_(n, &rwork[indrd], &c__1, &w[1], &c__1); + i__1 = *n - 1; + dcopy_(&i__1, &rwork[indre], &c__1, &rwork[indree], &c__1); + dsterf_(n, &w[1], &rwork[indree], info); + } else { + i__1 = *n - 1; + dcopy_(&i__1, &rwork[indre], &c__1, &rwork[indree], &c__1); + dcopy_(n, &rwork[indrd], &c__1, &rwork[indrdd], &c__1); + + if (*abstol <= *n * 2. * eps) { + tryrac = TRUE_; + } else { + tryrac = FALSE_; + } + zstemr_(jobz, "A", n, &rwork[indrdd], &rwork[indree], vl, vu, il, + iu, m, &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, + &rwork[indrwk], &llrwork, &iwork[1], liwork, info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by ZSTEMR. */ + + if (wantz && *info == 0) { + indwkn = indwk; + llwrkn = *lwork - indwkn + 1; + zunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau] + , &z__[z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + } + + + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. */ +/* Also call DSTEBZ and ZSTEIN if ZSTEMR fails. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indrd], & + rwork[indre], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwo], info); + + if (wantz) { + zstein_(n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwo], &iwork[indifl], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by ZSTEIN. */ + + indwkn = indwk; + llwrkn = *lwork - indwkn + 1; + zunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + } +/* L50: */ + } + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of ZHEEVR_2STAGE */ + +} /* zheevr_2stage__ */ + diff --git a/lapack-netlib/SRC/zheevx.c b/lapack-netlib/SRC/zheevx.c new file mode 100644 index 000000000..b367427d8 --- /dev/null +++ b/lapack-netlib/SRC/zheevx.c @@ -0,0 +1,1028 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, */ +/* ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK, */ +/* IWORK, IFAIL, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N */ +/* DOUBLE PRECISION ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEVX computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian matrix A. Eigenvalues and eigenvectors can */ +/* > be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is DOUBLE PRECISION */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing A to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*DLAMCH('S'). */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > On normal exit, the first M elements contain the selected */ +/* > eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise 2*N. */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the f2cmax of the blocksize for ZHETRD and for */ +/* > ZUNMTR as returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* > Their indices are stored in array IFAIL. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16HEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int zheevx_(char *jobz, char *range, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *vl, doublereal *vu, + integer *il, integer *iu, doublereal *abstol, integer *m, doublereal * + w, doublecomplex *z__, integer *ldz, doublecomplex *work, integer * + lwork, doublereal *rwork, integer *iwork, integer *ifail, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer indd, inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + logical test; + integer itmp1, i__, j, indee; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + integer iinfo; + char order[1]; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + logical lower, wantz; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer nb, jj; + extern doublereal dlamch_(char *); + logical alleig, indeig; + integer iscale, indibl; + logical valeig; + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + doublereal abstll, bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + integer indiwk, indisp, indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), dstebz_(char *, char *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indrwk, indwrk; + extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, + doublecomplex *, integer *, integer *); + integer lwkmin; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer llwork, nsplit; + doublereal smlnum; + extern /* Subroutine */ int zstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *), zungtr_(char *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *), + zunmtr_(char *, char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *); + doublereal eps, vll, vuu, tmp1; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + lower = lsame_(uplo, "L"); + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lquery = *lwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -9; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } + } + + if (*info == 0) { + if (*n <= 1) { + lwkmin = 1; + work[1].r = (doublereal) lwkmin, work[1].i = 0.; + } else { + lwkmin = *n << 1; + nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "ZUNMTR", uplo, n, &c_n1, &c_n1, + &c_n1, (ftnlen)6, (ftnlen)1); + nb = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,i__2); + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*lwork < lwkmin && ! lquery) { + *info = -17; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } else if (valeig) { + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + if (*vl < a[i__1].r && *vu >= a[i__2].r) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = f2cmin(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } + anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + zdscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + zdscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + indtau = 1; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + zhetrd_(uplo, n, &a[a_offset], lda, &rwork[indd], &rwork[inde], &work[ + indtau], &work[indwrk], &llwork, &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal to */ +/* zero, then call DSTERF or ZUNGTR and ZSTEQR. If this fails for */ +/* some eigenvalue, then try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + dcopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + dsterf_(n, &w[1], &rwork[indee], info); + } else { + zlacpy_("A", n, n, &a[a_offset], lda, &z__[z_offset], ldz); + zungtr_(uplo, n, &z__[z_offset], ldz, &work[indtau], &work[indwrk] + , &llwork, &iinfo); + i__1 = *n - 1; + dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + zsteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L30: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L40; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indd], & + rwork[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwk], info); + + if (wantz) { + zstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by ZSTEIN. */ + + zunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwrk], &llwork, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L40: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L50: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L60: */ + } + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHEEVX */ + +} /* zheevx_ */ + diff --git a/lapack-netlib/SRC/zheevx_2stage.c b/lapack-netlib/SRC/zheevx_2stage.c new file mode 100644 index 000000000..cb23c5965 --- /dev/null +++ b/lapack-netlib/SRC/zheevx_2stage.c @@ -0,0 +1,1086 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHEEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + HE matrices */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEVX_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, */ +/* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, */ +/* LWORK, RWORK, IWORK, IFAIL, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N */ +/* DOUBLE PRECISION ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. Eigenvalues and eigenvectors can */ +/* > be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is DOUBLE PRECISION */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing A to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*DLAMCH('S'). */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > On normal exit, the first M elements contain the selected */ +/* > eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, 8*N, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* > Their indices are stored in array IFAIL. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zheevx_2stage_(char *jobz, char *range, char *uplo, + integer *n, doublecomplex *a, integer *lda, doublereal *vl, + doublereal *vu, integer *il, integer *iu, doublereal *abstol, integer + *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex * + work, integer *lwork, doublereal *rwork, integer *iwork, integer * + ifail, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + integer indd, inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + doublereal anrm; + integer imax; + doublereal rmin, rmax; + logical test; + integer itmp1, i__, j, indee; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zhetrd_2stage_(char *, char *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *); + integer iinfo; + char order[1]; + integer lhtrd; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer lwmin; + logical lower; + integer lwtrd; + logical wantz; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer ib, kd, jj; + extern doublereal dlamch_(char *); + logical alleig, indeig; + integer iscale, indibl; + logical valeig; + doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + doublereal abstll, bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + integer indiwk, indisp, indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), dstebz_(char *, char *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *); + integer indrwk, indwrk; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + integer nsplit, llwork; + doublereal smlnum; + extern /* Subroutine */ int zstein_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *); + logical lquery; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *), zungtr_(char *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *), + zunmtr_(char *, char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *); + doublereal eps, vll, vuu; + integer indhous; + doublereal tmp1; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + lower = lsame_(uplo, "L"); + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -9; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + } else { + kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", jobz, n, &c_n1, &c_n1, + &c_n1); + ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", jobz, n, &kd, &c_n1, & + c_n1); + lhtrd = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwmin = *n + lhtrd + lwtrd; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + } + + if (*lwork < lwmin && ! lquery) { + *info = -17; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEVX_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } else if (valeig) { + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + if (*vl < a[i__1].r && *vu >= a[i__2].r) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_("Safe minimum"); + eps = dlamch_("Precision"); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); + rmax = f2cmin(d__1,d__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } + anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + zdscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + zdscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + indtau = 1; + indhous = indtau + *n; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + + zhetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &rwork[indd], &rwork[ + inde], &work[indtau], &work[indhous], &lhtrd, &work[indwrk], & + llwork, &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal to */ +/* zero, then call DSTERF or ZUNGTR and ZSTEQR. If this fails for */ +/* some eigenvalue, then try DSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.) { + dcopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + dsterf_(n, &w[1], &rwork[indee], info); + } else { + zlacpy_("A", n, n, &a[a_offset], lda, &z__[z_offset], ldz); + zungtr_(uplo, n, &z__[z_offset], ldz, &work[indtau], &work[indwrk] + , &llwork, &iinfo); + i__1 = *n - 1; + dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + zsteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L30: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L40; + } + *info = 0; + } + +/* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indd], & + rwork[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwk], info); + + if (wantz) { + zstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by ZSTEIN. */ + + zunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwrk], &llwork, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L40: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L50: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L60: */ + } + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + + return 0; + +/* End of ZHEEVX_2STAGE */ + +} /* zheevx_2stage__ */ + diff --git a/lapack-netlib/SRC/zhegs2.c b/lapack-netlib/SRC/zhegs2.c new file mode 100644 index 000000000..a26bc4e1e --- /dev/null +++ b/lapack-netlib/SRC/zhegs2.c @@ -0,0 +1,770 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHEGS2 reduces a Hermitian definite generalized eigenproblem to standard form, using the factor +ization results obtained from cpotrf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEGS2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, N */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEGS2 reduces a complex Hermitian-definite generalized */ +/* > eigenproblem to standard form. */ +/* > */ +/* > If ITYPE = 1, the problem is A*x = lambda*B*x, */ +/* > and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */ +/* > */ +/* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ +/* > B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L. */ +/* > */ +/* > B must have been previously factorized as U**H *U or L*L**H by ZPOTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */ +/* > = 2 or 3: compute U*A*U**H or L**H *A*L. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored, and how B has been factorized. */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n by n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n by n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the transformed matrix, stored in the */ +/* > same format as A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,N) */ +/* > The triangular factor from the Cholesky factorization of B, */ +/* > as returned by ZPOTRF. */ +/* > B is modified by the routine but restored on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zhegs2_(integer *itype, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + doublereal d__1, d__2; + doublecomplex z__1; + + /* Local variables */ + extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer k; + extern logical lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), ztrmv_( + char *, char *, char *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), ztrsv_(char * + , char *, char *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + doublecomplex ct; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *), zlacgv_( + integer *, doublecomplex *, integer *); + doublereal akk, bkk; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEGS2", &i__1, (ftnlen)6); + return 0; + } + + if (*itype == 1) { + if (upper) { + +/* Compute inv(U**H)*A*inv(U) */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the upper triangle of A(k:n,k:n) */ + + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; +/* Computing 2nd power */ + d__1 = bkk; + akk /= d__1 * d__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + zdscal_(&i__2, &d__1, &a[k + (k + 1) * a_dim1], lda); + d__1 = akk * -.5; + ct.r = d__1, ct.i = 0.; + i__2 = *n - k; + zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + i__2 = *n - k; + zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( + k + 1) * a_dim1], lda); + i__2 = *n - k; + z__1.r = -1., z__1.i = 0.; + zher2_(uplo, &i__2, &z__1, &a[k + (k + 1) * a_dim1], lda, + &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) + * a_dim1], lda); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( + k + 1) * a_dim1], lda); + i__2 = *n - k; + zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + ztrsv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[ + k + 1 + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * + a_dim1], lda); + i__2 = *n - k; + zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute inv(L)*A*inv(L**H) */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the lower triangle of A(k:n,k:n) */ + + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; +/* Computing 2nd power */ + d__1 = bkk; + akk /= d__1 * d__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + zdscal_(&i__2, &d__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = akk * -.5; + ct.r = d__1, ct.i = 0.; + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + + 1 + k * a_dim1], &c__1); + i__2 = *n - k; + z__1.r = -1., z__1.i = 0.; + zher2_(uplo, &i__2, &z__1, &a[k + 1 + k * a_dim1], &c__1, + &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) + * a_dim1], lda); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + + 1 + k * a_dim1], &c__1); + i__2 = *n - k; + ztrsv_(uplo, "No transpose", "Non-unit", &i__2, &b[k + 1 + + (k + 1) * b_dim1], ldb, &a[k + 1 + k * a_dim1], + &c__1); + } +/* L20: */ + } + } + } else { + if (upper) { + +/* Compute U*A*U**H */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the upper triangle of A(1:k,1:k) */ + + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + ztrmv_(uplo, "No transpose", "Non-unit", &i__2, &b[b_offset], + ldb, &a[k * a_dim1 + 1], &c__1); + d__1 = akk * .5; + ct.r = d__1, ct.i = 0.; + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + + 1], &c__1); + i__2 = k - 1; + zher2_(uplo, &i__2, &c_b1, &a[k * a_dim1 + 1], &c__1, &b[k * + b_dim1 + 1], &c__1, &a[a_offset], lda); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + + 1], &c__1); + i__2 = k - 1; + zdscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1); + i__2 = k + k * a_dim1; +/* Computing 2nd power */ + d__2 = bkk; + d__1 = akk * (d__2 * d__2); + a[i__2].r = d__1, a[i__2].i = 0.; +/* L30: */ + } + } else { + +/* Compute L**H *A*L */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the lower triangle of A(1:k,1:k) */ + + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + zlacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k - 1; + ztrmv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[ + b_offset], ldb, &a[k + a_dim1], lda); + d__1 = akk * .5; + ct.r = d__1, ct.i = 0.; + i__2 = k - 1; + zlacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + zher2_(uplo, &i__2, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1] + , ldb, &a[a_offset], lda); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + zlacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + zdscal_(&i__2, &bkk, &a[k + a_dim1], lda); + i__2 = k - 1; + zlacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k + k * a_dim1; +/* Computing 2nd power */ + d__2 = bkk; + d__1 = akk * (d__2 * d__2); + a[i__2].r = d__1, a[i__2].i = 0.; +/* L40: */ + } + } + } + return 0; + +/* End of ZHEGS2 */ + +} /* zhegs2_ */ + diff --git a/lapack-netlib/SRC/zhegst.c b/lapack-netlib/SRC/zhegst.c new file mode 100644 index 000000000..224643d44 --- /dev/null +++ b/lapack-netlib/SRC/zhegst.c @@ -0,0 +1,787 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHEGST */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEGST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, N */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEGST reduces a complex Hermitian-definite generalized */ +/* > eigenproblem to standard form. */ +/* > */ +/* > If ITYPE = 1, the problem is A*x = lambda*B*x, */ +/* > and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */ +/* > */ +/* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ +/* > B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. */ +/* > */ +/* > B must have been previously factorized as U**H*U or L*L**H by ZPOTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */ +/* > = 2 or 3: compute U*A*U**H or L**H*A*L. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored and B is factored as */ +/* > U**H*U; */ +/* > = 'L': Lower triangle of A is stored and B is factored as */ +/* > L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the transformed matrix, stored in the */ +/* > same format as A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,N) */ +/* > The triangular factor from the Cholesky factorization of B, */ +/* > as returned by ZPOTRF. */ +/* > B is modified by the routine but restored on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zhegst_(integer *itype, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + integer k; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zhemm_(char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + logical upper; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), zhegs2_(integer *, + char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *), zher2k_(char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + integer *); + integer kb, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEGST", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "ZHEGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + zhegs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + } else { + +/* Use blocked code */ + + if (*itype == 1) { + if (upper) { + +/* Compute inv(U**H)*A*inv(U) */ + + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = f2cmin(i__3,nb); + +/* Update the upper triangle of A(k:n,k:n) */ + + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ztrsm_("Left", uplo, "Conjugate transpose", "Non-unit" + , &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + (k + kb) * a_dim1], lda); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = 0.; + zhemm_("Left", uplo, &kb, &i__3, &z__1, &a[k + k * + a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, + &c_b1, &a[k + (k + kb) * a_dim1], lda); + i__3 = *n - k - kb + 1; + z__1.r = -1., z__1.i = 0.; + zher2k_(uplo, "Conjugate transpose", &i__3, &kb, & + z__1, &a[k + (k + kb) * a_dim1], lda, &b[k + ( + k + kb) * b_dim1], ldb, &c_b18, &a[k + kb + ( + k + kb) * a_dim1], lda) + ; + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = 0.; + zhemm_("Left", uplo, &kb, &i__3, &z__1, &a[k + k * + a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, + &c_b1, &a[k + (k + kb) * a_dim1], lda); + i__3 = *n - k - kb + 1; + ztrsm_("Right", uplo, "No transpose", "Non-unit", &kb, + &i__3, &c_b1, &b[k + kb + (k + kb) * b_dim1], + ldb, &a[k + (k + kb) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute inv(L)*A*inv(L**H) */ + + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = f2cmin(i__3,nb); + +/* Update the lower triangle of A(k:n,k:n) */ + + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ztrsm_("Right", uplo, "Conjugate transpose", "Non-un" + "it", &i__3, &kb, &c_b1, &b[k + k * b_dim1], + ldb, &a[k + kb + k * a_dim1], lda); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = 0.; + zhemm_("Right", uplo, &i__3, &kb, &z__1, &a[k + k * + a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & + c_b1, &a[k + kb + k * a_dim1], lda); + i__3 = *n - k - kb + 1; + z__1.r = -1., z__1.i = 0.; + zher2k_(uplo, "No transpose", &i__3, &kb, &z__1, &a[k + + kb + k * a_dim1], lda, &b[k + kb + k * + b_dim1], ldb, &c_b18, &a[k + kb + (k + kb) * + a_dim1], lda); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = 0.; + zhemm_("Right", uplo, &i__3, &kb, &z__1, &a[k + k * + a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & + c_b1, &a[k + kb + k * a_dim1], lda); + i__3 = *n - k - kb + 1; + ztrsm_("Left", uplo, "No transpose", "Non-unit", & + i__3, &kb, &c_b1, &b[k + kb + (k + kb) * + b_dim1], ldb, &a[k + kb + k * a_dim1], lda); + } +/* L20: */ + } + } + } else { + if (upper) { + +/* Compute U*A*U**H */ + + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = f2cmin(i__3,nb); + +/* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ + + i__3 = k - 1; + ztrmm_("Left", uplo, "No transpose", "Non-unit", &i__3, & + kb, &c_b1, &b[b_offset], ldb, &a[k * a_dim1 + 1], + lda); + i__3 = k - 1; + zhemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * + a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[ + k * a_dim1 + 1], lda); + i__3 = k - 1; + zher2k_(uplo, "No transpose", &i__3, &kb, &c_b1, &a[k * + a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b18, + &a[a_offset], lda); + i__3 = k - 1; + zhemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * + a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[ + k * a_dim1 + 1], lda); + i__3 = k - 1; + ztrmm_("Right", uplo, "Conjugate transpose", "Non-unit", & + i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, &a[k * + a_dim1 + 1], lda); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); +/* L30: */ + } + } else { + +/* Compute L**H*A*L */ + + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = f2cmin(i__3,nb); + +/* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ + + i__3 = k - 1; + ztrmm_("Right", uplo, "No transpose", "Non-unit", &kb, & + i__3, &c_b1, &b[b_offset], ldb, &a[k + a_dim1], + lda); + i__3 = k - 1; + zhemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1] + , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1], + lda); + i__3 = k - 1; + zher2k_(uplo, "Conjugate transpose", &i__3, &kb, &c_b1, & + a[k + a_dim1], lda, &b[k + b_dim1], ldb, &c_b18, & + a[a_offset], lda); + i__3 = k - 1; + zhemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1] + , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1], + lda); + i__3 = k - 1; + ztrmm_("Left", uplo, "Conjugate transpose", "Non-unit", & + kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, &a[k + + a_dim1], lda); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); +/* L40: */ + } + } + } + } + return 0; + +/* End of ZHEGST */ + +} /* zhegst_ */ + diff --git a/lapack-netlib/SRC/zhegv.c b/lapack-netlib/SRC/zhegv.c new file mode 100644 index 000000000..68890dd86 --- /dev/null +++ b/lapack-netlib/SRC/zhegv.c @@ -0,0 +1,738 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHEGV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEGV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */ +/* LWORK, RWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEGV computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ +/* > Here A and B are assumed to be Hermitian and B is also */ +/* > positive definite. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > matrix Z of eigenvectors. The eigenvectors are normalized */ +/* > as follows: */ +/* > if ITYPE = 1 or 2, Z**H*B*Z = I; */ +/* > if ITYPE = 3, Z**H*inv(B)*Z = I. */ +/* > If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* > or the lower triangle (if UPLO='L') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the Hermitian positive definite matrix B. */ +/* > If UPLO = 'U', the leading N-by-N upper triangular part of B */ +/* > contains the upper triangular part of the matrix B. */ +/* > If UPLO = 'L', the leading N-by-N lower triangular part of B */ +/* > contains the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**H*U or B = L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,2*N-1). */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the blocksize for ZHETRD returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, 3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: ZPOTRF or ZHEEV returned an error code: */ +/* > <= N: if INFO = i, ZHEEV failed to converge; */ +/* > i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int zhegv_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer neig; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zheev_(char *, char *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + integer *, doublereal *, integer *); + char trans[1]; + logical upper, wantz; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zhegst_(integer *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zpotrf_(char *, integer *, doublecomplex *, + integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --w; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,i__2); + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + +/* Computing MAX */ + i__1 = 1, i__2 = (*n << 1) - 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -11; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEGV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + zpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + zheev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[1] + , info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + + ztrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[ + b_offset], ldb, &a[a_offset], lda); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**H *y */ + + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + + ztrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[ + b_offset], ldb, &a[a_offset], lda); + } + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHEGV */ + +} /* zhegv_ */ + diff --git a/lapack-netlib/SRC/zhegv_2stage.c b/lapack-netlib/SRC/zhegv_2stage.c new file mode 100644 index 000000000..2d8409f75 --- /dev/null +++ b/lapack-netlib/SRC/zhegv_2stage.c @@ -0,0 +1,796 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHEGV_2STAGE */ + +/* @precisions fortran z -> c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEGV_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, */ +/* WORK, LWORK, RWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ +/* > Here A and B are assumed to be Hermitian and B is also */ +/* > positive definite. */ +/* > This routine use the 2stage technique for the reduction to tridiagonal */ +/* > which showed higher performance on recent architecture and for large */ +/* > sizes N>2000. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > matrix Z of eigenvectors. The eigenvectors are normalized */ +/* > as follows: */ +/* > if ITYPE = 1 or 2, Z**H*B*Z = I; */ +/* > if ITYPE = 3, Z**H*inv(B)*Z = I. */ +/* > If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* > or the lower triangle (if UPLO='L') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the Hermitian positive definite matrix B. */ +/* > If UPLO = 'U', the leading N-by-N upper triangular part of B */ +/* > contains the upper triangular part of the matrix B. */ +/* > If UPLO = 'L', the leading N-by-N lower triangular part of B */ +/* > contains the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**H*U or B = L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, 3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: ZPOTRF or ZHEEV returned an error code: */ +/* > <= N: if INFO = i, ZHEEV failed to converge; */ +/* > i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhegv_2stage_(integer *itype, char *jobz, char *uplo, + integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer + *ldb, doublereal *w, doublecomplex *work, integer *lwork, doublereal * + rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + integer neig; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + extern /* Subroutine */ int zheev_2stage_(char *, char *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + integer *, doublereal *, integer *); + extern logical lsame_(char *, char *); + integer lhtrd, lwmin; + char trans[1]; + logical upper; + integer lwtrd; + logical wantz; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer ib, kd; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zhegst_( + integer *, char *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *); + logical lquery; + extern /* Subroutine */ int zpotrf_(char *, integer *, doublecomplex *, + integer *, integer *); + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --w; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! lsame_(jobz, "N")) { + *info = -2; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } + + if (*info == 0) { + kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", jobz, n, &c_n1, &c_n1, & + c_n1); + ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1); + lhtrd = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwmin = *n + lhtrd + lwtrd; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEGV_2STAGE ", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + zpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + zheev_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, & + rwork[1], info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + + ztrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[ + b_offset], ldb, &a[a_offset], lda); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**H *y */ + + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + + ztrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[ + b_offset], ldb, &a[a_offset], lda); + } + } + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + + return 0; + +/* End of ZHEGV_2STAGE */ + +} /* zhegv_2stage__ */ + diff --git a/lapack-netlib/SRC/zhegvd.c b/lapack-netlib/SRC/zhegvd.c new file mode 100644 index 000000000..60d035842 --- /dev/null +++ b/lapack-netlib/SRC/zhegvd.c @@ -0,0 +1,830 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHEGVD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEGVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */ +/* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEGVD computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ +/* > B are assumed to be Hermitian and B is also positive definite. */ +/* > If eigenvectors are desired, it uses a divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > matrix Z of eigenvectors. The eigenvectors are normalized */ +/* > as follows: */ +/* > if ITYPE = 1 or 2, Z**H*B*Z = I; */ +/* > if ITYPE = 3, Z**H*inv(B)*Z = I. */ +/* > If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* > or the lower triangle (if UPLO='L') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the Hermitian matrix B. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of B contains the */ +/* > upper triangular part of the matrix B. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of B contains */ +/* > the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**H*U or B = L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. */ +/* > If N <= 1, LWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK >= N + 1. */ +/* > If JOBZ = 'V' and N > 1, LWORK >= 2*N + N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of the array RWORK. */ +/* > If N <= 1, LRWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK >= N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If N <= 1, LIWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LIWORK >= 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: ZPOTRF or ZHEEVD returned an error code: */ +/* > <= N: if INFO = i and JOBZ = 'N', then the algorithm */ +/* > failed to converge; i off-diagonal elements of an */ +/* > intermediate tridiagonal form did not converge to */ +/* > zero; */ +/* > if INFO = i and JOBZ = 'V', then the algorithm */ +/* > failed to compute an eigenvalue while working on */ +/* > the submatrix lying in rows and columns INFO/(N+1) */ +/* > through mod(INFO,N+1); */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Modified so that no backsubstitution is performed if ZHEEVD fails to */ +/* > converge (NEIG in old code could be greater than N causing out of */ +/* > bounds reference to A - reported by Ralf Meyer). Also corrected the */ +/* > description of INFO and the test on ITYPE. Sven, 16 Feb 05. */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhegvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer lopt; + extern logical lsame_(char *, char *); + integer lwmin; + char trans[1]; + integer liopt; + logical upper; + integer lropt; + logical wantz; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), xerbla_(char *, + integer *, ftnlen), zheevd_(char *, char *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *, integer + *); + integer liwmin; + extern /* Subroutine */ int zhegst_(integer *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + integer lrwmin; + logical lquery; + extern /* Subroutine */ int zpotrf_(char *, integer *, doublecomplex *, + integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + } else if (wantz) { + lwmin = (*n << 1) + *n * *n; + lrwmin = *n * 5 + 1 + (*n << 1) * *n; + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1; + lrwmin = *n; + liwmin = 1; + } + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } + + if (*info == 0) { + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -13; + } else if (*liwork < liwmin && ! lquery) { + *info = -15; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEGVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + zpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + zheevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[ + 1], lrwork, &iwork[1], liwork, info); +/* Computing MAX */ + d__1 = (doublereal) lopt, d__2 = work[1].r; + lopt = (integer) f2cmax(d__1,d__2); +/* Computing MAX */ + d__1 = (doublereal) lropt; + lropt = (integer) f2cmax(d__1,rwork[1]); +/* Computing MAX */ + d__1 = (doublereal) liopt, d__2 = (doublereal) iwork[1]; + liopt = (integer) f2cmax(d__1,d__2); + + if (wantz && *info == 0) { + +/* Backtransform eigenvectors to the original problem. */ + + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + + ztrsm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset], + ldb, &a[a_offset], lda); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**H *y */ + + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + + ztrmm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset], + ldb, &a[a_offset], lda); + } + } + + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + + return 0; + +/* End of ZHEGVD */ + +} /* zhegvd_ */ + diff --git a/lapack-netlib/SRC/zhegvx.c b/lapack-netlib/SRC/zhegvx.c new file mode 100644 index 000000000..80833cba4 --- /dev/null +++ b/lapack-netlib/SRC/zhegvx.c @@ -0,0 +1,896 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHEGVX */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEGVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, */ +/* VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, */ +/* LWORK, RWORK, IWORK, IFAIL, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N */ +/* DOUBLE PRECISION ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEGVX computes selected eigenvalues, and optionally, eigenvectors */ +/* > of a complex generalized Hermitian-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ +/* > B are assumed to be Hermitian and B is also positive definite. */ +/* > Eigenvalues and eigenvectors can be selected by specifying either a */ +/* > range of values or a range of indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB, N) */ +/* > On entry, the Hermitian matrix B. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of B contains the */ +/* > upper triangular part of the matrix B. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of B contains */ +/* > the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**H*U or B = L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION */ +/* > */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is DOUBLE PRECISION */ +/* > */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is DOUBLE PRECISION */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing C to tridiagonal form, where C is the symmetric */ +/* > matrix of the standard symmetric problem to which the */ +/* > generalized problem is transformed. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*DLAMCH('S'). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > The first M elements contain the selected */ +/* > eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > The eigenvectors are normalized as follows: */ +/* > if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* > if ITYPE = 3, Z**T*inv(B)*Z = I. */ +/* > */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the blocksize for ZHETRD returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: ZPOTRF or ZHEEVX returned an error code: */ +/* > <= N: if INFO = i, ZHEEVX failed to converge; */ +/* > i eigenvectors failed to converge. Their indices */ +/* > are stored in array IFAIL. */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ +/* Subroutine */ int zhegvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, + integer *ldb, doublereal *vl, doublereal *vu, integer *il, integer * + iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__, + integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, + integer *iwork, integer *ifail, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1, i__2; + + /* Local variables */ + extern logical lsame_(char *, char *); + char trans[1]; + logical upper, wantz; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer nb; + logical alleig, indeig, valeig; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zhegst_(integer *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zheevx_(char *, char *, char *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, doublereal *, doublecomplex * + , integer *, doublecomplex *, integer *, doublereal *, integer *, + integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zpotrf_(char *, integer *, doublecomplex *, + integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lquery = *lwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (alleig || valeig || indeig)) { + *info = -3; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -11; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -12; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -13; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -18; + } + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,i__2); + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -20; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEGVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + zpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + zheevx_(jobz, range, uplo, n, &a[a_offset], lda, vl, vu, il, iu, abstol, + m, &w[1], &z__[z_offset], ldz, &work[1], lwork, &rwork[1], &iwork[ + 1], &ifail[1], info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + if (*info > 0) { + *m = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + + ztrsm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset], + ldb, &z__[z_offset], ldz); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**H *y */ + + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + + ztrmm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset], + ldb, &z__[z_offset], ldz); + } + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHEGVX */ + +} /* zhegvx_ */ + diff --git a/lapack-netlib/SRC/zherfs.c b/lapack-netlib/SRC/zherfs.c new file mode 100644 index 000000000..200875497 --- /dev/null +++ b/lapack-netlib/SRC/zherfs.c @@ -0,0 +1,926 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHERFS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHERFS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */ +/* X, LDX, FERR, BERR, WORK, RWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHERFS improves the computed solution to a system of linear */ +/* > equations when the coefficient matrix is Hermitian indefinite, and */ +/* > provides error bounds and backward error estimates for the solution. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The Hermitian matrix A. If UPLO = 'U', the leading N-by-N */ +/* > upper triangular part of A contains the upper triangular part */ +/* > of the matrix A, and the strictly lower triangular part of A */ +/* > is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* > triangular part of A contains the lower triangular part of */ +/* > the matrix A, and the strictly upper triangular part of A is */ +/* > not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is COMPLEX*16 array, dimension (LDAF,N) */ +/* > The factored form of the matrix A. AF contains the block */ +/* > diagonal matrix D and the multipliers used to obtain the */ +/* > factor U or L from the factorization A = U*D*U**H or */ +/* > A = L*D*L**H as computed by ZHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D */ +/* > as determined by ZHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by ZHETRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > ITMAX is the maximum number of steps of iterative refinement. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zherfs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, + integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work, + doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + + /* Local variables */ + integer kase; + doublereal safe1, safe2; + integer i__, j, k; + doublereal s; + extern logical lsame_(char *, char *); + integer isave[3], count; + extern /* Subroutine */ int zhemv_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *); + logical upper; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), zlacn2_( + integer *, doublecomplex *, doublecomplex *, doublereal *, + integer *, integer *); + extern doublereal dlamch_(char *); + doublereal xk; + integer nz; + doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal lstres; + extern /* Subroutine */ int zhetrs_(char *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *); + doublereal eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } else if (*ldx < f2cmax(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHERFS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.; + berr[j] = 0.; +/* L10: */ + } + return 0; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = dlamch_("Epsilon"); + safmin = dlamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - A * X */ + + zcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1); + z__1.r = -1., z__1.i = 0.; + zhemv_(uplo, n, &z__1, &a[a_offset], lda, &x[j * x_dim1 + 1], &c__1, & + c_b1, &work[1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* f2cmax(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + rwork[i__] = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = d_imag(&b[ + i__ + j * b_dim1]), abs(d__2)); +/* L30: */ + } + +/* Compute abs(A)*abs(X) + abs(B). */ + + if (upper) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + i__3 = k + j * x_dim1; + xk = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[k + j * + x_dim1]), abs(d__2)); + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + k * a_dim1; + rwork[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = + d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * xk; + i__4 = i__ + k * a_dim1; + i__5 = i__ + j * x_dim1; + s += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[ + i__ + k * a_dim1]), abs(d__2))) * ((d__3 = x[i__5] + .r, abs(d__3)) + (d__4 = d_imag(&x[i__ + j * + x_dim1]), abs(d__4))); +/* L40: */ + } + i__3 = k + k * a_dim1; + rwork[k] = rwork[k] + (d__1 = a[i__3].r, abs(d__1)) * xk + s; +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.; + i__3 = k + j * x_dim1; + xk = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[k + j * + x_dim1]), abs(d__2)); + i__3 = k + k * a_dim1; + rwork[k] += (d__1 = a[i__3].r, abs(d__1)) * xk; + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + i__4 = i__ + k * a_dim1; + rwork[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = + d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * xk; + i__4 = i__ + k * a_dim1; + i__5 = i__ + j * x_dim1; + s += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[ + i__ + k * a_dim1]), abs(d__2))) * ((d__3 = x[i__5] + .r, abs(d__3)) + (d__4 = d_imag(&x[i__ + j * + x_dim1]), abs(d__4))); +/* L60: */ + } + rwork[k] += s; +/* L70: */ + } + } + s = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { +/* Computing MAX */ + i__3 = i__; + d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2))) / rwork[i__]; + s = f2cmax(d__3,d__4); + } else { +/* Computing MAX */ + i__3 = i__; + d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2)) + safe1) / (rwork[i__] + + safe1); + s = f2cmax(d__3,d__4); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { + +/* Update solution and try again. */ + + zhetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], + n, info); + zaxpy_(n, &c_b1, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1); + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(A))* */ +/* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(A) is the inverse of A */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(A)*abs(X) + abs(B) is less than SAFE2. */ + +/* Use ZLACN2 to estimate the infinity-norm of the matrix */ +/* inv(A) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { + i__3 = i__; + rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__] + ; + } else { + i__3 = i__; + rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = + d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__] + + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + zlacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(A**H). */ + + zhetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; +/* L110: */ + } + } else if (kase == 2) { + +/* Multiply by inv(A)*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; +/* L120: */ + } + zhetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + i__3 = i__ + j * x_dim1; + d__3 = lstres, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = + d_imag(&x[i__ + j * x_dim1]), abs(d__2)); + lstres = f2cmax(d__3,d__4); +/* L130: */ + } + if (lstres != 0.) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of ZHERFS */ + +} /* zherfs_ */ + diff --git a/lapack-netlib/SRC/zherfsx.c b/lapack-netlib/SRC/zherfsx.c new file mode 100644 index 000000000..e8dddda2b --- /dev/null +++ b/lapack-netlib/SRC/zherfsx.c @@ -0,0 +1,381 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHESV computes the solution to system of linear equations A * X = B for HE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHESV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */ +/* LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, LWORK, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHESV computes the solution to a complex system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > The diagonal pivoting method is used to factor A as */ +/* > A = U * D * U**H, if UPLO = 'U', or */ +/* > A = L * D * L**H, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and D is Hermitian and block diagonal with */ +/* > 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then */ +/* > used to solve the system of equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the block diagonal matrix D and the */ +/* > multipliers used to obtain the factor U or L from the */ +/* > factorization A = U*D*U**H or A = L*D*L**H as computed by */ +/* > ZHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D, as */ +/* > determined by ZHETRF. If IPIV(k) > 0, then rows and columns */ +/* > k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 */ +/* > diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, */ +/* > then rows and columns k-1 and -IPIV(k) were interchanged and */ +/* > D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and */ +/* > IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and */ +/* > -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 */ +/* > diagonal block. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= 1, and for best performance */ +/* > LWORK >= f2cmax(1,N*NB), where NB is the optimal blocksize for */ +/* > ZHETRF. */ +/* > for LWORK < N, TRS will be done with Level BLAS 2 */ +/* > for LWORK >= N, TRS will be done with Level BLAS 3 */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zhesv_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + integer nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zhetrf_(char *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *), zhetrs_(char *, integer *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zhetrs2_(char *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else if (*lwork < 1 && ! lquery) { + *info = -10; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, "ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHESV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Compute the factorization A = U*D*U**H or A = L*D*L**H. */ + + zhetrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + if (*lwork < *n) { + +/* Solve with TRS ( Use Level BLAS 2) */ + + zhetrs_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], + ldb, info); + + } else { + +/* Solve with TRS2 ( Use Level BLAS 3) */ + + zhetrs2_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], + ldb, &work[1], info); + + } + + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHESV */ + +} /* zhesv_ */ + diff --git a/lapack-netlib/SRC/zhesv_aa.c b/lapack-netlib/SRC/zhesv_aa.c new file mode 100644 index 000000000..884190483 --- /dev/null +++ b/lapack-netlib/SRC/zhesv_aa.c @@ -0,0 +1,651 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHESV_AA computes the solution to system of linear equations A * X = B for HE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHESV_AA + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */ +/* LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, LWORK, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHESV_AA computes the solution to a complex system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > Aasen's algorithm is used to factor A as */ +/* > A = U**H * T * U, if UPLO = 'U', or */ +/* > A = L * T * L**H, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and T is Hermitian and tridiagonal. The factored form */ +/* > of A is then used to solve the system of equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the tridiagonal matrix T and the */ +/* > multipliers used to obtain the factor U or L from the */ +/* > factorization A = U**H*T*U or A = L*T*L**H as computed by */ +/* > ZHETRF_AA. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > On exit, it contains the details of the interchanges, i.e., */ +/* > the row and column k of A were interchanged with the */ +/* > row and column IPIV(k). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= MAX(1,2*N,3*N-2), and for best */ +/* > performance LWORK >= f2cmax(1,N*NB), where NB is the optimal */ +/* > blocksize for ZHETRF. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16HEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zhesv_aa_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer lwkopt_hetrf__, lwkopt_hetrs__; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zhetrf_aa_(char *, integer *, doublecomplex * + , integer *, integer *, doublecomplex *, integer *, integer *), zhetrs_aa_(char *, integer *, integer *, doublecomplex * + , integer *, integer *, doublecomplex *, integer *, doublecomplex + *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n << 1, i__2 = *n * 3 - 2; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + + if (*info == 0) { + zhetrf_aa_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], &c_n1, + info); + lwkopt_hetrf__ = (integer) work[1].r; + zhetrs_aa_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], + ldb, &work[1], &c_n1, info); + lwkopt_hetrs__ = (integer) work[1].r; + lwkopt = f2cmax(lwkopt_hetrf__,lwkopt_hetrs__); + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHESV_AA ", &i__1, (ftnlen)9); + return 0; + } else if (lquery) { + return 0; + } + +/* Compute the factorization A = U**H*T*U or A = L*T*L**H. */ + + zhetrf_aa_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + zhetrs_aa_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], + ldb, &work[1], lwork, info); + + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHESV_AA */ + +} /* zhesv_aa__ */ + diff --git a/lapack-netlib/SRC/zhesv_aa_2stage.c b/lapack-netlib/SRC/zhesv_aa_2stage.c new file mode 100644 index 000000000..b37a875d8 --- /dev/null +++ b/lapack-netlib/SRC/zhesv_aa_2stage.c @@ -0,0 +1,679 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHESV_AA_2STAGE computes the solution to system of linear equations A * X = B for HE matrices + */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHESV_AA_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, */ +/* IPIV, IPIV2, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO */ +/* INTEGER IPIV( * ), IPIV2( * ) */ +/* COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHESV_AA_2STAGE computes the solution to a complex system of */ +/* > linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > Aasen's 2-stage algorithm is used to factor A as */ +/* > A = U**H * T * U, if UPLO = 'U', or */ +/* > A = L * T * L**H, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and T is Hermitian and band. The matrix T is */ +/* > then LU-factored with partial pivoting. The factored form of A */ +/* > is then used to solve the system of equations A * X = B. */ +/* > */ +/* > This is the blocked version of the algorithm, calling Level 3 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, L is stored below (or above) the subdiaonal blocks, */ +/* > when UPLO is 'L' (or 'U'). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TB */ +/* > \verbatim */ +/* > TB is COMPLEX*16 array, dimension (LTB) */ +/* > On exit, details of the LU factorization of the band matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LTB */ +/* > \verbatim */ +/* > LTB is INTEGER */ +/* > The size of the array TB. LTB >= 4*N, internally */ +/* > used to select NB such that LTB >= (3*NB+1)*N. */ +/* > */ +/* > If LTB = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal size of LTB, */ +/* > returns this value as the first entry of TB, and */ +/* > no error message related to LTB is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > On exit, it contains the details of the interchanges, i.e., */ +/* > the row and column k of A were interchanged with the */ +/* > row and column IPIV(k). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV2 */ +/* > \verbatim */ +/* > IPIV2 is INTEGER array, dimension (N) */ +/* > On exit, it contains the details of the interchanges, i.e., */ +/* > the row and column k of T were interchanged with the */ +/* > row and column IPIV(k). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 workspace of size LWORK */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The size of WORK. LWORK >= N, internally used to select NB */ +/* > such that LWORK >= N*NB. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal size of the WORK array, */ +/* > returns this value as the first entry of the WORK array, and */ +/* > no error message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, band LU factorization failed on i-th column */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16HEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zhesv_aa_2stage_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *tb, integer *ltb, + integer *ipiv, integer *ipiv2, doublecomplex *b, integer *ldb, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int zhetrf_aa_2stage_(char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *, + integer *, doublecomplex *, integer *, integer *), + zhetrs_aa_2stage_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, integer *, integer *, + doublecomplex *, integer *, integer *); + extern logical lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer lwkopt; + logical tquery, wquery; + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tb; + --ipiv; + --ipiv2; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + wquery = *lwork == -1; + tquery = *ltb == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ltb < *n << 2 && ! tquery) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -11; + } else if (*lwork < *n && ! wquery) { + *info = -13; + } + + if (*info == 0) { + zhetrf_aa_2stage_(uplo, n, &a[a_offset], lda, &tb[1], &c_n1, &ipiv[1] + , &ipiv2[1], &work[1], &c_n1, info); + lwkopt = (integer) work[1].r; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHESV_AA_2STAGE", &i__1, (ftnlen)15); + return 0; + } else if (wquery || tquery) { + return 0; + } + +/* Compute the factorization A = U**H*T*U or A = L*T*L**H. */ + + zhetrf_aa_2stage_(uplo, n, &a[a_offset], lda, &tb[1], ltb, &ipiv[1], & + ipiv2[1], &work[1], lwork, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + zhetrs_aa_2stage_(uplo, n, nrhs, &a[a_offset], lda, &tb[1], ltb, & + ipiv[1], &ipiv2[1], &b[b_offset], ldb, info); + + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHESV_AA_2STAGE */ + +} /* zhesv_aa_2stage__ */ + diff --git a/lapack-netlib/SRC/zhesv_rk.c b/lapack-netlib/SRC/zhesv_rk.c new file mode 100644 index 000000000..9624784e0 --- /dev/null +++ b/lapack-netlib/SRC/zhesv_rk.c @@ -0,0 +1,719 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHESV_RK computes the solution to system of linear equations A * X = B for SY matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHESV_RK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHESV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, */ +/* WORK, LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, LWORK, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > ZHESV_RK computes the solution to a complex system of linear */ +/* > equations A * X = B, where A is an N-by-N Hermitian matrix */ +/* > and X and B are N-by-NRHS matrices. */ +/* > */ +/* > The bounded Bunch-Kaufman (rook) diagonal pivoting method is used */ +/* > to factor A as */ +/* > A = P*U*D*(U**H)*(P**T), if UPLO = 'U', or */ +/* > A = P*L*D*(L**H)*(P**T), if UPLO = 'L', */ +/* > where U (or L) is unit upper (or lower) triangular matrix, */ +/* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */ +/* > matrix, P**T is the transpose of P, and D is Hermitian and block */ +/* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > ZHETRF_RK is called to compute the factorization of a complex */ +/* > Hermitian matrix. The factored form of A is then used to solve */ +/* > the system of equations A * X = B by calling BLAS3 routine ZHETRS_3. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. */ +/* > If UPLO = 'U': the leading N-by-N upper triangular part */ +/* > of A contains the upper triangular part of the matrix A, */ +/* > and the strictly lower triangular part of A is not */ +/* > referenced. */ +/* > */ +/* > If UPLO = 'L': the leading N-by-N lower triangular part */ +/* > of A contains the lower triangular part of the matrix A, */ +/* > and the strictly upper triangular part of A is not */ +/* > referenced. */ +/* > */ +/* > On exit, if INFO = 0, diagonal of the block diagonal */ +/* > matrix D and factors U or L as computed by ZHETRF_RK: */ +/* > a) ONLY diagonal elements of the Hermitian block diagonal */ +/* > matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */ +/* > (superdiagonal (or subdiagonal) elements of D */ +/* > are stored on exit in array E), and */ +/* > b) If UPLO = 'U': factor U in the superdiagonal part of A. */ +/* > If UPLO = 'L': factor L in the subdiagonal part of A. */ +/* > */ +/* > For more info see the description of ZHETRF_RK routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is COMPLEX*16 array, dimension (N) */ +/* > On exit, contains the output computed by the factorization */ +/* > routine ZHETRF_RK, i.e. the superdiagonal (or subdiagonal) */ +/* > elements of the Hermitian block diagonal matrix D */ +/* > with 1-by-1 or 2-by-2 diagonal blocks, where */ +/* > If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; */ +/* > If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. */ +/* > */ +/* > NOTE: For 1-by-1 diagonal block D(k), where */ +/* > 1 <= k <= N, the element E(k) is set to 0 in both */ +/* > UPLO = 'U' or UPLO = 'L' cases. */ +/* > */ +/* > For more info see the description of ZHETRF_RK routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D, */ +/* > as determined by ZHETRF_RK. */ +/* > */ +/* > For more info see the description of ZHETRF_RK routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension ( MAX(1,LWORK) ). */ +/* > Work array used in the factorization stage. */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= 1. For best performance */ +/* > of factorization stage LWORK >= f2cmax(1,N*NB), where NB is */ +/* > the optimal blocksize for ZHETRF_RK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; */ +/* > the routine only calculates the optimal size of the WORK */ +/* > array for factorization stage, returns this value as */ +/* > the first entry of the WORK array, and no error message */ +/* > related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > */ +/* > < 0: If INFO = -k, the k-th argument had an illegal value */ +/* > */ +/* > > 0: If INFO = k, the matrix A is singular, because: */ +/* > If UPLO = 'U': column k in the upper */ +/* > triangular part of A contains all zeros. */ +/* > If UPLO = 'L': column k in the lower */ +/* > triangular part of A contains all zeros. */ +/* > */ +/* > Therefore D(k,k) is exactly zero, and superdiagonal */ +/* > elements of column k of U (or subdiagonal elements of */ +/* > column k of L ) are all zeros. The factorization has */ +/* > been completed, but the block diagonal matrix D is */ +/* > exactly singular, and division by zero will occur if */ +/* > it is used to solve a system of equations. */ +/* > */ +/* > NOTE: INFO only stores the first occurrence of */ +/* > a singularity, any subsequent occurrence of singularity */ +/* > is not stored in INFO even though the factorization */ +/* > always completes. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > December 2016, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhesv_rk_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *e, integer *ipiv, + doublecomplex *b, integer *ldb, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int zhetrs_3_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zhetrf_rk_(char *, integer *, doublecomplex * + , integer *, doublecomplex *, integer *, doublecomplex *, integer + *, integer *), xerbla_(char *, integer *, ftnlen); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --e; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*lwork < 1 && ! lquery) { + *info = -11; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + zhetrf_rk_(uplo, n, &a[a_offset], lda, &e[1], &ipiv[1], &work[1], + &c_n1, info); + lwkopt = (integer) work[1].r; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHESV_RK ", &i__1, (ftnlen)9); + return 0; + } else if (lquery) { + return 0; + } + +/* Compute the factorization A = P*U*D*(U**H)*(P**T) or */ +/* A = P*U*D*(U**H)*(P**T). */ + + zhetrf_rk_(uplo, n, &a[a_offset], lda, &e[1], &ipiv[1], &work[1], lwork, + info); + + if (*info == 0) { + +/* Solve the system A*X = B with BLAS3 solver, overwriting B with X. */ + + zhetrs_3_(uplo, n, nrhs, &a[a_offset], lda, &e[1], &ipiv[1], &b[ + b_offset], ldb, info); + + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHESV_RK */ + +} /* zhesv_rk__ */ + diff --git a/lapack-netlib/SRC/zhesv_rook.c b/lapack-netlib/SRC/zhesv_rook.c new file mode 100644 index 000000000..eeef98d29 --- /dev/null +++ b/lapack-netlib/SRC/zhesv_rook.c @@ -0,0 +1,699 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHESV_ROOK computes the solution to a system of linear equations A * X = B for HE matrices usin +g the bounded Bunch-Kaufman ("rook") diagonal pivoting method */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHESV_ROOK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */ +/* LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, LWORK, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHESV_ROOK computes the solution to a complex system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used */ +/* > to factor A as */ +/* > A = U * D * U**T, if UPLO = 'U', or */ +/* > A = L * D * L**T, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and D is Hermitian and block diagonal with */ +/* > 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > ZHETRF_ROOK is called to compute the factorization of a complex */ +/* > Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal */ +/* > pivoting method. */ +/* > */ +/* > The factored form of A is then used to solve the system */ +/* > of equations A * X = B by calling ZHETRS_ROOK (uses BLAS 2). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the block diagonal matrix D and the */ +/* > multipliers used to obtain the factor U or L from the */ +/* > factorization A = U*D*U**H or A = L*D*L**H as computed by */ +/* > ZHETRF_ROOK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D. */ +/* > */ +/* > If UPLO = 'U': */ +/* > Only the last KB elements of IPIV are set. */ +/* > */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and */ +/* > columns k and -IPIV(k) were interchanged and rows and */ +/* > columns k-1 and -IPIV(k-1) were inerchaged, */ +/* > D(k-1:k,k-1:k) is a 2-by-2 diagonal block. */ +/* > */ +/* > If UPLO = 'L': */ +/* > Only the first KB elements of IPIV are set. */ +/* > */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) */ +/* > were interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and */ +/* > columns k and -IPIV(k) were interchanged and rows and */ +/* > columns k+1 and -IPIV(k+1) were inerchaged, */ +/* > D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= 1, and for best performance */ +/* > LWORK >= f2cmax(1,N*NB), where NB is the optimal blocksize for */ +/* > ZHETRF_ROOK. */ +/* > for LWORK < N, TRS will be done with Level BLAS 2 */ +/* > for LWORK >= N, TRS will be done with Level BLAS 3 */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2013 */ + +/* > \ingroup complex16HEsolve */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2013, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > \endverbatim */ + + +/* ===================================================================== */ +/* Subroutine */ int zhesv_rook_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int zhetrs_rook_(char *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *); + extern logical lsame_(char *, char *); + integer nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zhetrf_rook_(char *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.5.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2013 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else if (*lwork < 1 && ! lquery) { + *info = -10; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, "ZHETRF_ROOK", uplo, n, &c_n1, &c_n1, &c_n1, ( + ftnlen)11, (ftnlen)1); + lwkopt = *n * nb; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHESV_ROOK ", &i__1, (ftnlen)11); + return 0; + } else if (lquery) { + return 0; + } + +/* Compute the factorization A = U*D*U**H or A = L*D*L**H. */ + + zhetrf_rook_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + +/* Solve with TRS ( Use Level BLAS 2) */ + + zhetrs_rook_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset] + , ldb, info); + + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHESV_ROOK */ + +} /* zhesv_rook__ */ + diff --git a/lapack-netlib/SRC/zhesvx.c b/lapack-netlib/SRC/zhesvx.c new file mode 100644 index 000000000..423bfb21e --- /dev/null +++ b/lapack-netlib/SRC/zhesvx.c @@ -0,0 +1,844 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHESVX computes the solution to system of linear equations A * X = B for HE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHESVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, */ +/* LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, */ +/* RWORK, INFO ) */ + +/* CHARACTER FACT, UPLO */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS */ +/* DOUBLE PRECISION RCOND */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHESVX uses the diagonal pivoting factorization to compute the */ +/* > solution to a complex system of linear equations A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > Error bounds on the solution and a condition estimate are also */ +/* > provided. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'N', the diagonal pivoting method is used to factor A. */ +/* > The form of the factorization is */ +/* > A = U * D * U**H, if UPLO = 'U', or */ +/* > A = L * D * L**H, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and D is Hermitian and block diagonal with */ +/* > 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > 2. If some D(i,i)=0, so that D is exactly singular, then the routine */ +/* > returns with INFO = i. Otherwise, the factored form of A is used */ +/* > to estimate the condition number of the matrix A. If the */ +/* > reciprocal of the condition number is less than machine precision, */ +/* > INFO = N+1 is returned as a warning, but the routine still goes on */ +/* > to solve for X and compute error bounds as described below. */ +/* > */ +/* > 3. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 4. Iterative refinement is applied to improve the computed solution */ +/* > matrix and calculate error bounds and backward error estimates */ +/* > for it. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of A has been */ +/* > supplied on entry. */ +/* > = 'F': On entry, AF and IPIV contain the factored form */ +/* > of A. A, AF and IPIV will not be modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The Hermitian matrix A. If UPLO = 'U', the leading N-by-N */ +/* > upper triangular part of A contains the upper triangular part */ +/* > of the matrix A, and the strictly lower triangular part of A */ +/* > is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* > triangular part of A contains the lower triangular part of */ +/* > the matrix A, and the strictly upper triangular part of A is */ +/* > not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is COMPLEX*16 array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the block diagonal matrix D and the multipliers used */ +/* > to obtain the factor U or L from the factorization */ +/* > A = U*D*U**H or A = L*D*L**H as computed by ZHETRF. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the block diagonal matrix D and the multipliers used */ +/* > to obtain the factor U or L from the factorization */ +/* > A = U*D*U**H or A = L*D*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains details of the interchanges and the block structure */ +/* > of D, as determined by ZHETRF. */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ +/* > columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* > is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ +/* > IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ +/* > interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains details of the interchanges and the block structure */ +/* > of D, as determined by ZHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > The N-by-NRHS right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (LDX,NRHS) */ +/* > If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The estimate of the reciprocal condition number of the matrix */ +/* > A. If RCOND is less than the machine precision (in */ +/* > particular, if RCOND = 0), the matrix is singular to working */ +/* > precision. This condition is indicated by a return code of */ +/* > INFO > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= f2cmax(1,2*N), and for best */ +/* > performance, when FACT = 'N', LWORK >= f2cmax(1,2*N,N*NB), where */ +/* > NB is the optimal blocksize for ZHETRF. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: D(i,i) is exactly zero. The factorization */ +/* > has been completed but the factor D is exactly */ +/* > singular, so the solution and error bounds could */ +/* > not be computed. RCOND = 0 is returned. */ +/* > = N+1: D is nonsingular, but RCOND is less than machine */ +/* > precision, meaning that the matrix is singular */ +/* > to working precision. Nevertheless, the */ +/* > solution and error bounds are computed because */ +/* > there are a number of situations where the */ +/* > computed solution can be more accurate than the */ +/* > value of RCOND would suggest. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complex16HEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zhesvx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, + integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, + doublecomplex *work, integer *lwork, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2; + + /* Local variables */ + extern logical lsame_(char *, char *); + doublereal anorm; + integer nb; + extern doublereal dlamch_(char *); + logical nofact; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zhecon_(char *, integer *, doublecomplex *, + integer *, integer *, doublereal *, doublereal *, doublecomplex *, + integer *), zherfs_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *, doublecomplex *, doublereal *, + integer *), zhetrf_(char *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *), zhetrs_(char *, + integer *, integer *, doublecomplex *, integer *, integer *, + doublecomplex *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + lquery = *lwork == -1; + if (! nofact && ! lsame_(fact, "F")) { + *info = -1; + } else if (! lsame_(uplo, "U") && ! lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (*ldb < f2cmax(1,*n)) { + *info = -11; + } else if (*ldx < f2cmax(1,*n)) { + *info = -13; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -18; + } + } + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + lwkopt = f2cmax(i__1,i__2); + if (nofact) { + nb = ilaenv_(&c__1, "ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n * nb; + lwkopt = f2cmax(i__1,i__2); + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHESVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + + if (nofact) { + +/* Compute the factorization A = U*D*U**H or A = L*D*L**H. */ + + zlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); + zhetrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], lwork, + info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + anorm = zlanhe_("I", uplo, n, &a[a_offset], lda, &rwork[1]); + +/* Compute the reciprocal of the condition number of A. */ + + zhecon_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &anorm, rcond, &work[1], + info); + +/* Compute the solution vectors X. */ + + zlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + zhetrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solutions and */ +/* compute error bounds and backward error estimates for them. */ + + zherfs_(uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], + &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1] + , &rwork[1], info); + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < dlamch_("Epsilon")) { + *info = *n + 1; + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHESVX */ + +} /* zhesvx_ */ + diff --git a/lapack-netlib/SRC/zhesvxx.c b/lapack-netlib/SRC/zhesvxx.c new file mode 100644 index 000000000..164294305 --- /dev/null +++ b/lapack-netlib/SRC/zhesvxx.c @@ -0,0 +1,1128 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief ZHESVXX computes the solution to system of linear equations A * X = B for HE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHESVXX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHESVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, */ +/* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, */ +/* NPARAMS, PARAMS, WORK, RWORK, INFO ) */ + +/* CHARACTER EQUED, FACT, UPLO */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */ +/* $ N_ERR_BNDS */ +/* DOUBLE PRECISION RCOND, RPVGRW */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ WORK( * ), X( LDX, * ) */ +/* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), RWORK( * ), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHESVXX uses the diagonal pivoting factorization to compute the */ +/* > solution to a complex*16 system of linear equations A * X = B, where */ +/* > A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > If requested, both normwise and maximum componentwise error bounds */ +/* > are returned. ZHESVXX will return a solution with a tiny */ +/* > guaranteed error (O(eps) where eps is the working machine */ +/* > precision) unless the matrix is very ill-conditioned, in which */ +/* > case a warning is returned. Relevant condition numbers also are */ +/* > calculated and returned. */ +/* > */ +/* > ZHESVXX accepts user-provided factorizations and equilibration */ +/* > factors; see the definitions of the FACT and EQUED options. */ +/* > Solving with refinement and using a factorization from a previous */ +/* > ZHESVXX call will also produce a solution with either O(eps) */ +/* > errors or warnings, but we cannot make that claim for general */ +/* > user-provided factorizations and equilibration factors if they */ +/* > differ from what ZHESVXX would itself produce. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'E', double precision scaling factors are computed to equilibrate */ +/* > the system: */ +/* > */ +/* > diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B */ +/* > */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ +/* > the matrix A (after equilibration if FACT = 'E') as */ +/* > */ +/* > A = U * D * U**T, if UPLO = 'U', or */ +/* > A = L * D * L**T, if UPLO = 'L', */ +/* > */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and D is Hermitian and block diagonal with */ +/* > 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > 3. If some D(i,i)=0, so that D is exactly singular, then the */ +/* > routine returns with INFO = i. Otherwise, the factored form of A */ +/* > is used to estimate the condition number of the matrix A (see */ +/* > argument RCOND). If the reciprocal of the condition number is */ +/* > less than machine precision, the routine still goes on to solve */ +/* > for X and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* > the routine will use iterative refinement to try to get a small */ +/* > error and error bounds. Refinement calculates the residual to at */ +/* > least twice the working precision. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(R) so that it solves the original system before */ +/* > equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \verbatim */ +/* > Some optional parameters are bundled in the PARAMS array. These */ +/* > settings determine how refinement is performed, but often the */ +/* > defaults are acceptable. If the defaults are acceptable, users */ +/* > can pass NPARAMS = 0 which prevents the source code from accessing */ +/* > the PARAMS argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* > If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by S. */ +/* > A, AF, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The Hermitian matrix A. If UPLO = 'U', the leading N-by-N */ +/* > upper triangular part of A contains the upper triangular */ +/* > part of the matrix A, and the strictly lower triangular */ +/* > part of A is not referenced. If UPLO = 'L', the leading */ +/* > N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ +/* > diag(S)*A*diag(S). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is COMPLEX*16 array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the block diagonal matrix D and the multipliers */ +/* > used to obtain the factor U or L from the factorization A = */ +/* > U*D*U**T or A = L*D*L**T as computed by DSYTRF. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the block diagonal matrix D and the multipliers */ +/* > used to obtain the factor U or L from the factorization A = */ +/* > U*D*U**T or A = L*D*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains details of the interchanges and the block */ +/* > structure of D, as determined by ZHETRF. If IPIV(k) > 0, */ +/* > then rows and columns k and IPIV(k) were interchanged and */ +/* > D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and */ +/* > IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and */ +/* > -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 */ +/* > diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, */ +/* > then rows and columns k+1 and -IPIV(k) were interchanged */ +/* > and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains details of the interchanges and the block */ +/* > structure of D, as determined by ZHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'Y': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(S) * A * diag(S). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (N) */ +/* > The scale factors for A. If EQUED = 'Y', A is multiplied on */ +/* > the left and right by diag(S). S is an input argument if FACT = */ +/* > 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ +/* > = 'Y', each element of S must be positive. If S is output, each */ +/* > element of S is a power of the radix. If S is input, each element */ +/* > of S should be a power of the radix to ensure a reliable solution */ +/* > and error estimates. Scaling by powers of the radix does not cause */ +/* > rounding errors unless the result underflows or overflows. */ +/* > Rounding errors during scaling lead to refining with a matrix that */ +/* > is not equivalent to the input matrix, producing error estimates */ +/* > that may not be reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if EQUED = 'Y', B is overwritten by diag(S)*B; */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (LDX,NRHS) */ +/* > If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* > system of equations. Note that A and B are modified on exit if */ +/* > EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* > inv(diag(S))*X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RPVGRW */ +/* > \verbatim */ +/* > RPVGRW is DOUBLE PRECISION */ +/* > Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* > pivot growth factor norm(A)/norm(U). The "f2cmax absolute element" */ +/* > norm is used. If this is much less than 1, then the stability of */ +/* > the LU factorization of the (equilibrated) matrix A could be poor. */ +/* > This also means that the solution X, estimated condition numbers, */ +/* > and error bounds could be unreliable. If factorization fails with */ +/* > 0 for the leading INFO columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is DOUBLE PRECISION array, dimension (NRHS) */ +/* > Componentwise relative backward error. This is the */ +/* > componentwise relative backward error of each solution vector X(j) */ +/* > (i.e., the smallest relative change in any element of A or B that */ +/* > makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_ERR_BNDS */ +/* > \verbatim */ +/* > N_ERR_BNDS is INTEGER */ +/* > Number of error bounds to return for each right hand side */ +/* > and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* > ERR_BNDS_COMP below. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * dlamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * dlamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NPARAMS */ +/* > \verbatim */ +/* > NPARAMS is INTEGER */ +/* > Specifies the number of parameters set in PARAMS. If <= 0, the */ +/* > PARAMS array is never referenced and default values are used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PARAMS */ +/* > \verbatim */ +/* > PARAMS is DOUBLE PRECISION array, dimension NPARAMS */ +/* > Specifies algorithm parameters. If an entry is < 0.0, then */ +/* > that entry will be filled with default value used for that */ +/* > parameter. Only positions up to NPARAMS are accessed; defaults */ +/* > are used for higher-numbered parameters. */ +/* > */ +/* > PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* > refinement or not. */ +/* > Default: 1.0D+0 */ +/* > = 0.0: No refinement is performed, and no error bounds are */ +/* > computed. */ +/* > = 1.0: Use the extra-precise refinement algorithm. */ +/* > (other values are reserved for future use) */ +/* > */ +/* > PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* > computations allowed for refinement. */ +/* > Default: 10 */ +/* > Aggressive: Set to 100 to permit convergence using approximate */ +/* > factorizations or factorizations other than LU. If */ +/* > the factorization uses a technique other than */ +/* > Gaussian elimination, the guarantees in */ +/* > err_bnds_norm and err_bnds_comp may no longer be */ +/* > trustworthy. */ +/* > */ +/* > PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* > will attempt to find a solution with small componentwise */ +/* > relative error in the double-precision algorithm. Positive */ +/* > is true, 0.0 is false. */ +/* > Default: 1.0 (attempt componentwise convergence) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. The solution to every right-hand side is */ +/* > guaranteed. */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly singular, so */ +/* > the solution and error bounds could not be computed. RCOND = 0 */ +/* > is returned. */ +/* > = N+J: The solution corresponding to the Jth right-hand side is */ +/* > not guaranteed. The solutions corresponding to other right- */ +/* > hand sides K with K > J may not be guaranteed as well, but */ +/* > only the first such right-hand side is reported. If a small */ +/* > componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* > the Jth right-hand side is the first with a normwise error */ +/* > bound that is not guaranteed (the smallest J such */ +/* > that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* > the Jth right-hand side is the first with either a normwise or */ +/* > componentwise error bound that is not guaranteed (the smallest */ +/* > J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* > ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* > ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* > about all of the right-hand sides check ERR_BNDS_NORM or */ +/* > ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complex16HEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int zhesvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * + ldaf, integer *ipiv, char *equed, doublereal *s, doublecomplex *b, + integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, + doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, + doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer * + nparams, doublereal *params, doublecomplex *work, doublereal *rwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal amax, smin, smax; + extern doublereal zla_herpvgrw_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *, + doublereal *); + integer j; + extern logical lsame_(char *, char *); + doublereal scond; + logical equil, rcequ; + extern doublereal dlamch_(char *); + logical nofact; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int zlaqhe_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublereal *, char *); + integer infequ; + extern /* Subroutine */ int zhetrf_(char *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + doublereal smlnum; + extern /* Subroutine */ int zhetrs_(char *, integer *, integer *, + doublecomplex *, integer *, integer *, doublecomplex *, integer *, + integer *), zlascl2_(integer *, integer *, doublereal *, + doublecomplex *, integer *), zheequb_(char *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublereal *, doublecomplex *, integer *), zherfsx_(char * + , char *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublecomplex *, doublereal *, integer * + ); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ================================================================== */ + + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --s; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --berr; + --params; + --work; + --rwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + smlnum = dlamch_("Safe minimum"); + bignum = 1. / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rcequ = FALSE_; + } else { + rcequ = lsame_(equed, "Y"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in ZHERFSX. */ + + *rpvgrw = 0.; + +/* Test the input parameters. PARAMS is not tested until ZHERFSX. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! lsame_(uplo, "U") && ! lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (lsame_(fact, "F") && ! (rcequ || lsame_( + equed, "N"))) { + *info = -9; + } else { + if (rcequ) { + smin = bignum; + smax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + d__1 = smin, d__2 = s[j]; + smin = f2cmin(d__1,d__2); +/* Computing MAX */ + d__1 = smax, d__2 = s[j]; + smax = f2cmax(d__1,d__2); +/* L10: */ + } + if (smin <= 0.) { + *info = -10; + } else if (*n > 0) { + scond = f2cmax(smin,smlnum) / f2cmin(smax,bignum); + } else { + scond = 1.; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -12; + } else if (*ldx < f2cmax(1,*n)) { + *info = -14; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHESVXX", &i__1, (ftnlen)7); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + zheequb_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, &work[1], & + infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + zlaqhe_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, equed); + rcequ = lsame_(equed, "Y"); + } + } + +/* Scale the right-hand side. */ + + if (rcequ) { + zlascl2_(n, nrhs, &s[1], &b[b_offset], ldb); + } + + if (nofact || equil) { + +/* Compute the LDL^T or UDU^T factorization of A. */ + + zlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); + i__1 = f2cmax(1,*n) * 5; + zhetrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], &i__1, + info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + if (*n > 0) { + *rpvgrw = zla_herpvgrw_(uplo, n, info, &a[a_offset], lda, & + af[af_offset], ldaf, &ipiv[1], &rwork[1]); + } + return 0; + } + } + +/* Compute the reciprocal pivot growth factor RPVGRW. */ + + if (*n > 0) { + *rpvgrw = zla_herpvgrw_(uplo, n, info, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &rwork[1]); + } + +/* Compute the solution matrix X. */ + + zlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + zhetrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + zherfsx_(uplo, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & + ipiv[1], &s[1], &b[b_offset], ldb, &x[x_offset], ldx, rcond, & + berr[1], n_err_bnds__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], nparams, ¶ms[1], &work[ + 1], &rwork[1], info); + +/* Scale solutions. */ + + if (rcequ) { + zlascl2_(n, nrhs, &s[1], &x[x_offset], ldx); + } + + return 0; + +/* End of ZHESVXX */ + +} /* zhesvxx_ */ + diff --git a/lapack-netlib/SRC/zheswapr.c b/lapack-netlib/SRC/zheswapr.c new file mode 100644 index 000000000..ed7a55c93 --- /dev/null +++ b/lapack-netlib/SRC/zheswapr.c @@ -0,0 +1,630 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHESWAPR applies an elementary permutation on the rows and columns of a Hermitian matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHESWAPR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHESWAPR( UPLO, N, A, LDA, I1, I2) */ + +/* CHARACTER UPLO */ +/* INTEGER I1, I2, LDA, N */ +/* COMPLEX*16 A( LDA, N ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHESWAPR applies an elementary permutation on the rows and the columns of */ +/* > a hermitian matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the details of the factorization are stored */ +/* > as an upper or lower triangular matrix. */ +/* > = 'U': Upper triangular, form is A = U*D*U**T; */ +/* > = 'L': Lower triangular, form is A = L*D*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the NB diagonal matrix D and the multipliers */ +/* > used to obtain the factor U or L as computed by CSYTRF. */ +/* > */ +/* > On exit, if INFO = 0, the (symmetric) inverse of the original */ +/* > matrix. If UPLO = 'U', the upper triangular part of the */ +/* > inverse is formed and the part of A below the diagonal is not */ +/* > referenced; if UPLO = 'L' the lower triangular part of the */ +/* > inverse is formed and the part of A above the diagonal is */ +/* > not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] I1 */ +/* > \verbatim */ +/* > I1 is INTEGER */ +/* > Index of the first row to swap */ +/* > \endverbatim */ +/* > */ +/* > \param[in] I2 */ +/* > \verbatim */ +/* > I2 is INTEGER */ +/* > Index of the second row to swap */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zheswapr_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *i1, integer *i2) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + integer i__; + extern logical lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + doublecomplex tmp; + + +/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + + /* Function Body */ + upper = lsame_(uplo, "U"); + if (upper) { + +/* UPPER */ +/* first swap */ +/* - swap column I1 and I2 from I1 to I1-1 */ + i__1 = *i1 - 1; + zswap_(&i__1, &a[*i1 * a_dim1 + 1], &c__1, &a[*i2 * a_dim1 + 1], & + c__1); + +/* second swap : */ +/* - swap A(I1,I1) and A(I2,I2) */ +/* - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 */ +/* - swap A(I2,I1) and A(I1,I2) */ + i__1 = *i1 + *i1 * a_dim1; + tmp.r = a[i__1].r, tmp.i = a[i__1].i; + i__1 = *i1 + *i1 * a_dim1; + i__2 = *i2 + *i2 * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = *i2 + *i2 * a_dim1; + a[i__1].r = tmp.r, a[i__1].i = tmp.i; + + i__1 = *i2 - *i1 - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *i1 + (*i1 + i__) * a_dim1; + tmp.r = a[i__2].r, tmp.i = a[i__2].i; + i__2 = *i1 + (*i1 + i__) * a_dim1; + d_cnjg(&z__1, &a[*i1 + i__ + *i2 * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = *i1 + i__ + *i2 * a_dim1; + d_cnjg(&z__1, &tmp); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + + i__1 = *i1 + *i2 * a_dim1; + d_cnjg(&z__1, &a[*i1 + *i2 * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* third swap */ +/* - swap row I1 and I2 from I2+1 to N */ + i__1 = *n; + for (i__ = *i2 + 1; i__ <= i__1; ++i__) { + i__2 = *i1 + i__ * a_dim1; + tmp.r = a[i__2].r, tmp.i = a[i__2].i; + i__2 = *i1 + i__ * a_dim1; + i__3 = *i2 + i__ * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; + i__2 = *i2 + i__ * a_dim1; + a[i__2].r = tmp.r, a[i__2].i = tmp.i; + } + + } else { + +/* LOWER */ +/* first swap */ +/* - swap row I1 and I2 from 1 to I1-1 */ + i__1 = *i1 - 1; + zswap_(&i__1, &a[*i1 + a_dim1], lda, &a[*i2 + a_dim1], lda); + +/* second swap : */ +/* - swap A(I1,I1) and A(I2,I2) */ +/* - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 */ +/* - swap A(I2,I1) and A(I1,I2) */ + i__1 = *i1 + *i1 * a_dim1; + tmp.r = a[i__1].r, tmp.i = a[i__1].i; + i__1 = *i1 + *i1 * a_dim1; + i__2 = *i2 + *i2 * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = *i2 + *i2 * a_dim1; + a[i__1].r = tmp.r, a[i__1].i = tmp.i; + + i__1 = *i2 - *i1 - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *i1 + i__ + *i1 * a_dim1; + tmp.r = a[i__2].r, tmp.i = a[i__2].i; + i__2 = *i1 + i__ + *i1 * a_dim1; + d_cnjg(&z__1, &a[*i2 + (*i1 + i__) * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = *i2 + (*i1 + i__) * a_dim1; + d_cnjg(&z__1, &tmp); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + + i__1 = *i2 + *i1 * a_dim1; + d_cnjg(&z__1, &a[*i2 + *i1 * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* third swap */ +/* - swap col I1 and I2 from I2+1 to N */ + i__1 = *n; + for (i__ = *i2 + 1; i__ <= i__1; ++i__) { + i__2 = i__ + *i1 * a_dim1; + tmp.r = a[i__2].r, tmp.i = a[i__2].i; + i__2 = i__ + *i1 * a_dim1; + i__3 = i__ + *i2 * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; + i__2 = i__ + *i2 * a_dim1; + a[i__2].r = tmp.r, a[i__2].i = tmp.i; + } + + } + return 0; +} /* zheswapr_ */ + diff --git a/lapack-netlib/SRC/zhetd2.c b/lapack-netlib/SRC/zhetd2.c new file mode 100644 index 000000000..38774f35a --- /dev/null +++ b/lapack-netlib/SRC/zhetd2.c @@ -0,0 +1,796 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHETD2 reduces a Hermitian matrix to real symmetric tridiagonal form by an unitary similarity t +ransformation (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETD2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* DOUBLE PRECISION D( * ), E( * ) */ +/* COMPLEX*16 A( LDA, * ), TAU( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHETD2 reduces a complex Hermitian matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n-1) . . . H(2) H(1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* > A(1:i-1,i+1), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(n-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( d e v2 v3 v4 ) ( d ) */ +/* > ( d e v3 v4 ) ( e d ) */ +/* > ( d e v4 ) ( v1 e d ) */ +/* > ( d e ) ( v1 v2 e d ) */ +/* > ( d ) ( v1 v2 v3 e d ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhetd2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + doublecomplex taui; + extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + integer i__; + doublecomplex alpha; + extern logical lsame_(char *, char *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zhemv_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *); + logical upper; + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_( + char *, integer *, ftnlen), zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETD2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + i__1 = *n + *n * a_dim1; + i__2 = *n + *n * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v**H */ +/* to annihilate A(1:i-1,i+1) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__1].r, alpha.i = a[i__1].i; + zlarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); + i__1 = i__; + e[i__1] = alpha.r; + + if (taui.r != 0. || taui.i != 0.) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + zhemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b2, &tau[1], &c__1); + +/* Compute w := x - 1/2 * tau * (x**H * v) * v */ + + z__3.r = -.5, z__3.i = 0.; + z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * + taui.i + z__3.i * taui.r; + zdotc_(&z__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1] + , &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + zaxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w**H - w * v**H */ + + z__1.r = -1., z__1.i = 0.; + zher2_(uplo, &i__, &z__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, & + tau[1], &c__1, &a[a_offset], lda); + + } else { + i__1 = i__ + i__ * a_dim1; + i__2 = i__ + i__ * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + i__1 = i__ + (i__ + 1) * a_dim1; + i__2 = i__; + a[i__1].r = e[i__2], a[i__1].i = 0.; + i__1 = i__ + 1; + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + d__[i__1] = a[i__2].r; + i__1 = i__; + tau[i__1].r = taui.r, tau[i__1].i = taui.i; +/* L10: */ + } + i__1 = a_dim1 + 1; + d__[1] = a[i__1].r; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v**H */ +/* to annihilate A(i+2:n,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[f2cmin(i__3,*n) + i__ * a_dim1], &c__1, & + taui); + i__2 = i__; + e[i__2] = alpha.r; + + if (taui.r != 0. || taui.i != 0.) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + zhemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b2, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x**H * v) * v */ + + z__3.r = -.5, z__3.i = 0.; + z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * + taui.i + z__3.i * taui.r; + i__2 = *n - i__; + zdotc_(&z__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * + a_dim1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = *n - i__; + zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w**H - w * v**H */ + + i__2 = *n - i__; + z__1.r = -1., z__1.i = 0.; + zher2_(uplo, &i__2, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1, + &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); + + } else { + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + i__3 = i__ + 1 + (i__ + 1) * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.; + i__2 = i__; + i__3 = i__ + i__ * a_dim1; + d__[i__2] = a[i__3].r; + i__2 = i__; + tau[i__2].r = taui.r, tau[i__2].i = taui.i; +/* L20: */ + } + i__1 = *n; + i__2 = *n + *n * a_dim1; + d__[i__1] = a[i__2].r; + } + + return 0; + +/* End of ZHETD2 */ + +} /* zhetd2_ */ + diff --git a/lapack-netlib/SRC/zhetf2.c b/lapack-netlib/SRC/zhetf2.c new file mode 100644 index 000000000..625909dc4 --- /dev/null +++ b/lapack-netlib/SRC/zhetf2.c @@ -0,0 +1,1262 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHETF2 computes the factorization of a complex Hermitian matrix, using the diagonal pivoting me +thod (unblocked algorithm, calling Level 2 BLAS). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETF2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETF2( UPLO, N, A, LDA, IPIV, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHETF2 computes the factorization of a complex Hermitian matrix A */ +/* > using the Bunch-Kaufman diagonal pivoting method: */ +/* > */ +/* > A = U*D*U**H or A = L*D*L**H */ +/* > */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, U**H is the conjugate transpose of U, and D is */ +/* > Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, the block diagonal matrix D and the multipliers used */ +/* > to obtain the factor U or L (see below for further details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D. */ +/* > */ +/* > If UPLO = 'U': */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) = IPIV(k-1) < 0, then rows and columns */ +/* > k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* > is a 2-by-2 diagonal block. */ +/* > */ +/* > If UPLO = 'L': */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) = IPIV(k+1) < 0, then rows and columns */ +/* > k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) */ +/* > is a 2-by-2 diagonal block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, D(k,k) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, and division by zero will occur if it */ +/* > is used to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2013 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', then A = U*D*U**H, where */ +/* > U = P(n)*U(n)* ... *P(k)U(k)* ..., */ +/* > i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ +/* > 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* > and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* > defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ +/* > that if the diagonal block D(k) is of order s (s = 1 or 2), then */ +/* > */ +/* > ( I v 0 ) k-s */ +/* > U(k) = ( 0 I 0 ) s */ +/* > ( 0 0 I ) n-k */ +/* > k-s s n-k */ +/* > */ +/* > If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ +/* > If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ +/* > and A(k,k), and v overwrites A(1:k-2,k-1:k). */ +/* > */ +/* > If UPLO = 'L', then A = L*D*L**H, where */ +/* > L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ +/* > i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ +/* > n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* > and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* > defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ +/* > that if the diagonal block D(k) is of order s (s = 1 or 2), then */ +/* > */ +/* > ( I 0 0 ) k-1 */ +/* > L(k) = ( 0 I 0 ) s */ +/* > ( 0 v I ) n-k-s+1 */ +/* > k-1 s n-k-s+1 */ +/* > */ +/* > If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ +/* > If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ +/* > and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > 09-29-06 - patch from */ +/* > Bobby Cheng, MathWorks */ +/* > */ +/* > Replace l.210 and l.393 */ +/* > IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN */ +/* > by */ +/* > IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN */ +/* > */ +/* > 01-01-96 - Based on modifications by */ +/* > J. Lewis, Boeing Computer Services Company */ +/* > A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhetf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6; + + /* Local variables */ + integer imax, jmax; + extern /* Subroutine */ int zher_(char *, integer *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *); + doublereal d__; + integer i__, j, k; + doublecomplex t; + doublereal alpha; + extern logical lsame_(char *, char *); + integer kstep; + logical upper; + doublereal r1; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + extern doublereal dlapy2_(doublereal *, doublereal *); + doublereal d11; + doublecomplex d12; + doublereal d22; + doublecomplex d21; + integer kk, kp; + doublereal absakk; + doublecomplex wk; + doublereal tt; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + doublereal colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + doublecomplex wkm1, wkp1; + + +/* -- LAPACK computational routine (version 3.5.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2013 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETF2", &i__1, (ftnlen)6); + return 0; + } + +/* Initialize ALPHA for use in choosing pivot block size. */ + + alpha = (sqrt(17.) + 1.) / 8.; + + if (upper) { + +/* Factorize A as U*D*U**H using the upper triangle of A */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2 */ + + k = *n; +L10: + +/* If K < 1, exit from loop */ + + if (k < 1) { + goto L90; + } + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[imax + + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + + if (f2cmax(absakk,colmax) == 0. || disnan_(&absakk)) { + +/* Column K is zero or underflow, or contains a NaN: */ +/* set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + +/* ============================================================ */ + +/* Test for interchange */ + + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine only ROWMAX. */ + + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &a[imax + (imax + 1) * a_dim1], + lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[ + imax + jmax * a_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); +/* Computing MAX */ + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + ( + d__2 = d_imag(&a[jmax + imax * a_dim1]), abs(d__2) + ); + rowmax = f2cmax(d__3,d__4); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else /* if(complicated condition) */ { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= alpha * rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + } else { + +/* interchange rows and columns K-1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + + } + +/* ============================================================ */ + + kk = k - kstep + 1; + if (kp != kk) { + +/* Interchange rows and columns KK and KP in the leading */ +/* submatrix A(1:k,1:k) */ + + i__1 = kp - 1; + zswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], + &c__1); + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L20: */ + } + i__1 = kp + kk * a_dim1; + d_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + +/* Update the leading submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = U(k)*D(k) */ + +/* where U(k) is the k-th column of U */ + +/* Perform a rank-1 update of A(1:k-1,1:k-1) as */ + +/* A := A - U(k)*D(k)*U(k)**H = A - W(k)*1/D(k)*W(k)**H */ + + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = k - 1; + d__1 = -r1; + zher_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, &a[ + a_offset], lda); + +/* Store U(k) in column k */ + + i__1 = k - 1; + zdscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + +/* 2-by-2 pivot block D(k): columns k and k-1 now hold */ + +/* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ + +/* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ +/* of U */ + +/* Perform a rank-2 update of A(1:k-2,1:k-2) as */ + +/* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**H */ +/* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**H */ + + if (k > 2) { + + i__1 = k - 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_imag(&a[k - 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k - 1 + (k - 1) * a_dim1; + d22 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d11 = a[i__1].r / d__; + tt = 1. / (d11 * d22 - 1.); + i__1 = k - 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d12.r = z__1.r, d12.i = z__1.i; + d__ = tt / d__; + + for (j = k - 2; j >= 1; --j) { + i__1 = j + (k - 1) * a_dim1; + z__3.r = d11 * a[i__1].r, z__3.i = d11 * a[i__1].i; + d_cnjg(&z__5, &d12); + i__2 = j + k * a_dim1; + z__4.r = z__5.r * a[i__2].r - z__5.i * a[i__2].i, + z__4.i = z__5.r * a[i__2].i + z__5.i * a[i__2] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wkm1.r = z__1.r, wkm1.i = z__1.i; + i__1 = j + k * a_dim1; + z__3.r = d22 * a[i__1].r, z__3.i = d22 * a[i__1].i; + i__2 = j + (k - 1) * a_dim1; + z__4.r = d12.r * a[i__2].r - d12.i * a[i__2].i, + z__4.i = d12.r * a[i__2].i + d12.i * a[i__2] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + d_cnjg(&z__4, &wk); + z__3.r = a[i__3].r * z__4.r - a[i__3].i * z__4.i, + z__3.i = a[i__3].r * z__4.i + a[i__3].i * + z__4.r; + z__2.r = a[i__2].r - z__3.r, z__2.i = a[i__2].i - + z__3.i; + i__4 = i__ + (k - 1) * a_dim1; + d_cnjg(&z__6, &wkm1); + z__5.r = a[i__4].r * z__6.r - a[i__4].i * z__6.i, + z__5.i = a[i__4].r * z__6.i + a[i__4].i * + z__6.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - + z__5.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* L30: */ + } + i__1 = j + k * a_dim1; + a[i__1].r = wk.r, a[i__1].i = wk.i; + i__1 = j + (k - 1) * a_dim1; + a[i__1].r = wkm1.r, a[i__1].i = wkm1.i; + i__1 = j + j * a_dim1; + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + z__1.r = d__1, z__1.i = 0.; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* L40: */ + } + + } + + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kstep; + goto L10; + + } else { + +/* Factorize A as L*D*L**H using the lower triangle of A */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2 */ + + k = 1; +L50: + +/* If K > N, exit from loop */ + + if (k > *n) { + goto L90; + } + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[imax + + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + + if (f2cmax(absakk,colmax) == 0. || disnan_(&absakk)) { + +/* Column K is zero or underflow, or contains a NaN: */ +/* set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + +/* ============================================================ */ + +/* Test for interchange */ + + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine only ROWMAX. */ + + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[ + imax + jmax * a_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &a[imax + 1 + imax * a_dim1], + &c__1); +/* Computing MAX */ + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + ( + d__2 = d_imag(&a[jmax + imax * a_dim1]), abs(d__2) + ); + rowmax = f2cmax(d__3,d__4); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else /* if(complicated condition) */ { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= alpha * rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + } else { + +/* interchange rows and columns K+1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + + } + +/* ============================================================ */ + + kk = k + kstep - 1; + if (kp != kk) { + +/* Interchange rows and columns KK and KP in the trailing */ +/* submatrix A(k:n,k:n) */ + + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + + kp * a_dim1], &c__1); + } + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L60: */ + } + i__1 = kp + kk * a_dim1; + d_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + +/* Update the trailing submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = L(k)*D(k) */ + +/* where L(k) is the k-th column of L */ + + if (k < *n) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ + +/* A := A - L(k)*D(k)*L(k)**H = A - W(k)*(1/D(k))*W(k)**H */ + + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = *n - k; + d__1 = -r1; + zher_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], &c__1, & + a[k + 1 + (k + 1) * a_dim1], lda); + +/* Store L(k) in column K */ + + i__1 = *n - k; + zdscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + +/* 2-by-2 pivot block D(k) */ + + if (k < *n - 1) { + +/* Perform a rank-2 update of A(k+2:n,k+2:n) as */ + +/* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**H */ +/* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**H */ + +/* where L(k) and L(k+1) are the k-th and (k+1)-th */ +/* columns of L */ + + i__1 = k + 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_imag(&a[k + 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k + 1 + (k + 1) * a_dim1; + d11 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d22 = a[i__1].r / d__; + tt = 1. / (d11 * d22 - 1.); + i__1 = k + 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d21.r = z__1.r, d21.i = z__1.i; + d__ = tt / d__; + + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + z__3.r = d11 * a[i__2].r, z__3.i = d11 * a[i__2].i; + i__3 = j + (k + 1) * a_dim1; + z__4.r = d21.r * a[i__3].r - d21.i * a[i__3].i, + z__4.i = d21.r * a[i__3].i + d21.i * a[i__3] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__3.r = d22 * a[i__2].r, z__3.i = d22 * a[i__2].i; + d_cnjg(&z__5, &d21); + i__3 = j + k * a_dim1; + z__4.r = z__5.r * a[i__3].r - z__5.i * a[i__3].i, + z__4.i = z__5.r * a[i__3].i + z__5.i * a[i__3] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wkp1.r = z__1.r, wkp1.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + d_cnjg(&z__4, &wk); + z__3.r = a[i__5].r * z__4.r - a[i__5].i * z__4.i, + z__3.i = a[i__5].r * z__4.i + a[i__5].i * + z__4.r; + z__2.r = a[i__4].r - z__3.r, z__2.i = a[i__4].i - + z__3.i; + i__6 = i__ + (k + 1) * a_dim1; + d_cnjg(&z__6, &wkp1); + z__5.r = a[i__6].r * z__6.r - a[i__6].i * z__6.i, + z__5.i = a[i__6].r * z__6.i + a[i__6].i * + z__6.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - + z__5.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L70: */ + } + i__2 = j + k * a_dim1; + a[i__2].r = wk.r, a[i__2].i = wk.i; + i__2 = j + (k + 1) * a_dim1; + a[i__2].r = wkp1.r, a[i__2].i = wkp1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + z__1.r = d__1, z__1.i = 0.; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L80: */ + } + } + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + +/* Increase K and return to the start of the main loop */ + + k += kstep; + goto L50; + + } + +L90: + return 0; + +/* End of ZHETF2 */ + +} /* zhetf2_ */ + diff --git a/lapack-netlib/SRC/zhetf2_rk.c b/lapack-netlib/SRC/zhetf2_rk.c new file mode 100644 index 000000000..8a3dfa4fa --- /dev/null +++ b/lapack-netlib/SRC/zhetf2_rk.c @@ -0,0 +1,1723 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHETF2_RK computes the factorization of a complex Hermitian indefinite matrix using the bounded + Bunch-Kaufman (rook) diagonal pivoting method (BLAS2 unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETF2_RK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ), E ( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > ZHETF2_RK computes the factorization of a complex Hermitian matrix A */ +/* > using the bounded Bunch-Kaufman (rook) diagonal pivoting method: */ +/* > */ +/* > A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), */ +/* > */ +/* > where U (or L) is unit upper (or lower) triangular matrix, */ +/* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */ +/* > matrix, P**T is the transpose of P, and D is Hermitian and block */ +/* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */ +/* > For more information see Further Details section. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. */ +/* > If UPLO = 'U': the leading N-by-N upper triangular part */ +/* > of A contains the upper triangular part of the matrix A, */ +/* > and the strictly lower triangular part of A is not */ +/* > referenced. */ +/* > */ +/* > If UPLO = 'L': the leading N-by-N lower triangular part */ +/* > of A contains the lower triangular part of the matrix A, */ +/* > and the strictly upper triangular part of A is not */ +/* > referenced. */ +/* > */ +/* > On exit, contains: */ +/* > a) ONLY diagonal elements of the Hermitian block diagonal */ +/* > matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */ +/* > (superdiagonal (or subdiagonal) elements of D */ +/* > are stored on exit in array E), and */ +/* > b) If UPLO = 'U': factor U in the superdiagonal part of A. */ +/* > If UPLO = 'L': factor L in the subdiagonal part of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is COMPLEX*16 array, dimension (N) */ +/* > On exit, contains the superdiagonal (or subdiagonal) */ +/* > elements of the Hermitian block diagonal matrix D */ +/* > with 1-by-1 or 2-by-2 diagonal blocks, where */ +/* > If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; */ +/* > If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. */ +/* > */ +/* > NOTE: For 1-by-1 diagonal block D(k), where */ +/* > 1 <= k <= N, the element E(k) is set to 0 in both */ +/* > UPLO = 'U' or UPLO = 'L' cases. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > IPIV describes the permutation matrix P in the factorization */ +/* > of matrix A as follows. The absolute value of IPIV(k) */ +/* > represents the index of row and column that were */ +/* > interchanged with the k-th row and column. The value of UPLO */ +/* > describes the order in which the interchanges were applied. */ +/* > Also, the sign of IPIV represents the block structure of */ +/* > the Hermitian block diagonal matrix D with 1-by-1 or 2-by-2 */ +/* > diagonal blocks which correspond to 1 or 2 interchanges */ +/* > at each factorization step. For more info see Further */ +/* > Details section. */ +/* > */ +/* > If UPLO = 'U', */ +/* > ( in factorization order, k decreases from N to 1 ): */ +/* > a) A single positive entry IPIV(k) > 0 means: */ +/* > D(k,k) is a 1-by-1 diagonal block. */ +/* > If IPIV(k) != k, rows and columns k and IPIV(k) were */ +/* > interchanged in the matrix A(1:N,1:N); */ +/* > If IPIV(k) = k, no interchange occurred. */ +/* > */ +/* > b) A pair of consecutive negative entries */ +/* > IPIV(k) < 0 and IPIV(k-1) < 0 means: */ +/* > D(k-1:k,k-1:k) is a 2-by-2 diagonal block. */ +/* > (NOTE: negative entries in IPIV appear ONLY in pairs). */ +/* > 1) If -IPIV(k) != k, rows and columns */ +/* > k and -IPIV(k) were interchanged */ +/* > in the matrix A(1:N,1:N). */ +/* > If -IPIV(k) = k, no interchange occurred. */ +/* > 2) If -IPIV(k-1) != k-1, rows and columns */ +/* > k-1 and -IPIV(k-1) were interchanged */ +/* > in the matrix A(1:N,1:N). */ +/* > If -IPIV(k-1) = k-1, no interchange occurred. */ +/* > */ +/* > c) In both cases a) and b), always ABS( IPIV(k) ) <= k. */ +/* > */ +/* > d) NOTE: Any entry IPIV(k) is always NONZERO on output. */ +/* > */ +/* > If UPLO = 'L', */ +/* > ( in factorization order, k increases from 1 to N ): */ +/* > a) A single positive entry IPIV(k) > 0 means: */ +/* > D(k,k) is a 1-by-1 diagonal block. */ +/* > If IPIV(k) != k, rows and columns k and IPIV(k) were */ +/* > interchanged in the matrix A(1:N,1:N). */ +/* > If IPIV(k) = k, no interchange occurred. */ +/* > */ +/* > b) A pair of consecutive negative entries */ +/* > IPIV(k) < 0 and IPIV(k+1) < 0 means: */ +/* > D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > (NOTE: negative entries in IPIV appear ONLY in pairs). */ +/* > 1) If -IPIV(k) != k, rows and columns */ +/* > k and -IPIV(k) were interchanged */ +/* > in the matrix A(1:N,1:N). */ +/* > If -IPIV(k) = k, no interchange occurred. */ +/* > 2) If -IPIV(k+1) != k+1, rows and columns */ +/* > k-1 and -IPIV(k-1) were interchanged */ +/* > in the matrix A(1:N,1:N). */ +/* > If -IPIV(k+1) = k+1, no interchange occurred. */ +/* > */ +/* > c) In both cases a) and b), always ABS( IPIV(k) ) >= k. */ +/* > */ +/* > d) NOTE: Any entry IPIV(k) is always NONZERO on output. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > */ +/* > < 0: If INFO = -k, the k-th argument had an illegal value */ +/* > */ +/* > > 0: If INFO = k, the matrix A is singular, because: */ +/* > If UPLO = 'U': column k in the upper */ +/* > triangular part of A contains all zeros. */ +/* > If UPLO = 'L': column k in the lower */ +/* > triangular part of A contains all zeros. */ +/* > */ +/* > Therefore D(k,k) is exactly zero, and superdiagonal */ +/* > elements of column k of U (or subdiagonal elements of */ +/* > column k of L ) are all zeros. The factorization has */ +/* > been completed, but the block diagonal matrix D is */ +/* > exactly singular, and division by zero will occur if */ +/* > it is used to solve a system of equations. */ +/* > */ +/* > NOTE: INFO only stores the first occurrence of */ +/* > a singularity, any subsequent occurrence of singularity */ +/* > is not stored in INFO even though the factorization */ +/* > always completes. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > TODO: put further details */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > December 2016, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > 01-01-96 - Based on modifications by */ +/* > J. Lewis, Boeing Computer Services Company */ +/* > A. Petitet, Computer Science Dept., */ +/* > Univ. of Tenn., Knoxville abd , USA */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhetf2_rk_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublecomplex *e, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8; + + /* Local variables */ + logical done; + integer imax, jmax; + extern /* Subroutine */ int zher_(char *, integer *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *); + doublereal d__; + integer i__, j, k, p; + doublecomplex t; + doublereal alpha; + extern logical lsame_(char *, char *); + doublereal dtemp, sfmin; + integer itemp, kstep; + logical upper; + doublereal r1; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + extern doublereal dlapy2_(doublereal *, doublereal *); + doublereal d11; + doublecomplex d12; + doublereal d22; + doublecomplex d21; + integer ii, kk; + extern doublereal dlamch_(char *); + integer kp; + doublereal absakk; + doublecomplex wk; + doublereal tt; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + doublereal colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + doublecomplex wkm1, wkp1; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ====================================================================== */ + + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --e; + --ipiv; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETF2_RK", &i__1, (ftnlen)9); + return 0; + } + +/* Initialize ALPHA for use in choosing pivot block size. */ + + alpha = (sqrt(17.) + 1.) / 8.; + +/* Compute machine safe minimum */ + + sfmin = dlamch_("S"); + + if (upper) { + +/* Factorize A as U*D*U**H using the upper triangle of A */ + +/* Initialize the first entry of array E, where superdiagonal */ +/* elements of D are stored */ + + e[1].r = 0., e[1].i = 0.; + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2 */ + + k = *n; +L10: + +/* If K < 1, exit from loop */ + + if (k < 1) { + goto L34; + } + kstep = 1; + p = k; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[imax + + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + + if (f2cmax(absakk,colmax) == 0.) { + +/* Column K is zero or underflow: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + +/* Set E( K ) to zero */ + + if (k > 1) { + i__1 = k; + e[i__1].r = 0., e[i__1].i = 0.; + } + + } else { + +/* ============================================================ */ + +/* BEGIN pivot search */ + +/* Case(1) */ +/* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX */ +/* (used to handle NaN and Inf) */ + + if (! (absakk < alpha * colmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else { + + done = FALSE_; + +/* Loop until pivot found */ + +L12: + +/* BEGIN pivot search loop body */ + + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine both ROWMAX and JMAX. */ + + if (imax != k) { + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &a[imax + (imax + 1) * + a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(& + a[imax + jmax * a_dim1]), abs(d__2)); + } else { + rowmax = 0.; + } + + if (imax > 1) { + i__1 = imax - 1; + itemp = izamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = itemp + imax * a_dim1; + dtemp = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[ + itemp + imax * a_dim1]), abs(d__2)); + if (dtemp > rowmax) { + rowmax = dtemp; + jmax = itemp; + } + } + +/* Case(2) */ +/* Equivalent to testing for */ +/* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX */ +/* (used to handle NaN and Inf) */ + + i__1 = imax + imax * a_dim1; + if (! ((d__1 = a[i__1].r, abs(d__1)) < alpha * rowmax)) { + +/* interchange rows and columns K and IMAX, */ +/* use 1-by-1 pivot block */ + + kp = imax; + done = TRUE_; + +/* Case(3) */ +/* Equivalent to testing for ROWMAX.EQ.COLMAX, */ +/* (used to handle NaN and Inf) */ + + } else if (p == jmax || rowmax <= colmax) { + +/* interchange rows and columns K-1 and IMAX, */ +/* use 2-by-2 pivot block */ + + kp = imax; + kstep = 2; + done = TRUE_; + +/* Case(4) */ + } else { + +/* Pivot not found: set params and repeat */ + + p = imax; + colmax = rowmax; + imax = jmax; + } + +/* END pivot search loop body */ + + if (! done) { + goto L12; + } + + } + +/* END pivot search */ + +/* ============================================================ */ + +/* KK is the column of A where pivoting step stopped */ + + kk = k - kstep + 1; + +/* For only a 2x2 pivot, interchange rows and columns K and P */ +/* in the leading submatrix A(1:k,1:k) */ + + if (kstep == 2 && p != k) { +/* (1) Swap columnar parts */ + if (p > 1) { + i__1 = p - 1; + zswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 + + 1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = k - 1; + for (j = p + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + k * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + k * a_dim1; + d_cnjg(&z__1, &a[p + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = p + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L14: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = p + k * a_dim1; + d_cnjg(&z__1, &a[p + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = k + k * a_dim1; + r1 = a[i__1].r; + i__1 = k + k * a_dim1; + i__2 = p + p * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = p + p * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + +/* Convert upper triangle of A into U form by applying */ +/* the interchanges in columns k+1:N. */ + + if (k < *n) { + i__1 = *n - k; + zswap_(&i__1, &a[k + (k + 1) * a_dim1], lda, &a[p + (k + + 1) * a_dim1], lda); + } + + } + +/* For both 1x1 and 2x2 pivots, interchange rows and */ +/* columns KK and KP in the leading submatrix A(1:k,1:k) */ + + if (kp != kk) { +/* (1) Swap columnar parts */ + if (kp > 1) { + i__1 = kp - 1; + zswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + + 1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L15: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = kp + kk * a_dim1; + d_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + + if (kstep == 2) { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; +/* (5) Swap row elements */ + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + +/* Convert upper triangle of A into U form by applying */ +/* the interchanges in columns k+1:N. */ + + if (k < *n) { + i__1 = *n - k; + zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + + 1) * a_dim1], lda); + } + + } else { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + +/* Update the leading submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = U(k)*D(k) */ + +/* where U(k) is the k-th column of U */ + + if (k > 1) { + +/* Perform a rank-1 update of A(1:k-1,1:k-1) and */ +/* store U(k) in column k */ + + i__1 = k + k * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= sfmin) { + +/* Perform a rank-1 update of A(1:k-1,1:k-1) as */ +/* A := A - U(k)*D(k)*U(k)**T */ +/* = A - W(k)*1/D(k)*W(k)**T */ + + i__1 = k + k * a_dim1; + d11 = 1. / a[i__1].r; + i__1 = k - 1; + d__1 = -d11; + zher_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, & + a[a_offset], lda); + +/* Store U(k) in column k */ + + i__1 = k - 1; + zdscal_(&i__1, &d11, &a[k * a_dim1 + 1], &c__1); + } else { + +/* Store L(k) in column K */ + + i__1 = k + k * a_dim1; + d11 = a[i__1].r; + i__1 = k - 1; + for (ii = 1; ii <= i__1; ++ii) { + i__2 = ii + k * a_dim1; + i__3 = ii + k * a_dim1; + z__1.r = a[i__3].r / d11, z__1.i = a[i__3].i / + d11; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L16: */ + } + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - U(k)*D(k)*U(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ +/* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T */ + + i__1 = k - 1; + d__1 = -d11; + zher_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, & + a[a_offset], lda); + } + +/* Store the superdiagonal element of D in array E */ + + i__1 = k; + e[i__1].r = 0., e[i__1].i = 0.; + + } + + } else { + +/* 2-by-2 pivot block D(k): columns k and k-1 now hold */ + +/* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ + +/* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ +/* of U */ + +/* Perform a rank-2 update of A(1:k-2,1:k-2) as */ + +/* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T */ +/* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T */ + +/* and store L(k) and L(k+1) in columns k and k+1 */ + + if (k > 2) { +/* D = |A12| */ + i__1 = k - 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_imag(&a[k - 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d11 = z__1.r; + i__1 = k - 1 + (k - 1) * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d22 = z__1.r; + i__1 = k - 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d12.r = z__1.r, d12.i = z__1.i; + tt = 1. / (d11 * d22 - 1.); + + for (j = k - 2; j >= 1; --j) { + +/* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J */ + + i__1 = j + (k - 1) * a_dim1; + z__3.r = d11 * a[i__1].r, z__3.i = d11 * a[i__1].i; + d_cnjg(&z__5, &d12); + i__2 = j + k * a_dim1; + z__4.r = z__5.r * a[i__2].r - z__5.i * a[i__2].i, + z__4.i = z__5.r * a[i__2].i + z__5.i * a[i__2] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = tt * z__2.r, z__1.i = tt * z__2.i; + wkm1.r = z__1.r, wkm1.i = z__1.i; + i__1 = j + k * a_dim1; + z__3.r = d22 * a[i__1].r, z__3.i = d22 * a[i__1].i; + i__2 = j + (k - 1) * a_dim1; + z__4.r = d12.r * a[i__2].r - d12.i * a[i__2].i, + z__4.i = d12.r * a[i__2].i + d12.i * a[i__2] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = tt * z__2.r, z__1.i = tt * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + +/* Perform a rank-2 update of A(1:k-2,1:k-2) */ + + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + z__4.r = a[i__3].r / d__, z__4.i = a[i__3].i / + d__; + d_cnjg(&z__5, &wk); + z__3.r = z__4.r * z__5.r - z__4.i * z__5.i, + z__3.i = z__4.r * z__5.i + z__4.i * + z__5.r; + z__2.r = a[i__2].r - z__3.r, z__2.i = a[i__2].i - + z__3.i; + i__4 = i__ + (k - 1) * a_dim1; + z__7.r = a[i__4].r / d__, z__7.i = a[i__4].i / + d__; + d_cnjg(&z__8, &wkm1); + z__6.r = z__7.r * z__8.r - z__7.i * z__8.i, + z__6.i = z__7.r * z__8.i + z__7.i * + z__8.r; + z__1.r = z__2.r - z__6.r, z__1.i = z__2.i - + z__6.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* L20: */ + } + +/* Store U(k) and U(k-1) in cols k and k-1 for row J */ + + i__1 = j + k * a_dim1; + z__1.r = wk.r / d__, z__1.i = wk.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = j + (k - 1) * a_dim1; + z__1.r = wkm1.r / d__, z__1.i = wkm1.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = j + j * a_dim1; + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + z__1.r = d__1, z__1.i = 0.; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* L30: */ + } + + } + +/* Copy superdiagonal elements of D(K) to E(K) and */ +/* ZERO out superdiagonal entry of A */ + + i__1 = k; + i__2 = k - 1 + k * a_dim1; + e[i__1].r = a[i__2].r, e[i__1].i = a[i__2].i; + i__1 = k - 1; + e[i__1].r = 0., e[i__1].i = 0.; + i__1 = k - 1 + k * a_dim1; + a[i__1].r = 0., a[i__1].i = 0.; + + } + +/* End column K is nonsingular */ + + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -p; + ipiv[k - 1] = -kp; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kstep; + goto L10; + +L34: + + ; + } else { + +/* Factorize A as L*D*L**H using the lower triangle of A */ + +/* Initialize the unused last entry of the subdiagonal array E. */ + + i__1 = *n; + e[i__1].r = 0., e[i__1].i = 0.; + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2 */ + + k = 1; +L40: + +/* If K > N, exit from loop */ + + if (k > *n) { + goto L64; + } + kstep = 1; + p = k; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[imax + + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + + if (f2cmax(absakk,colmax) == 0.) { + +/* Column K is zero or underflow: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + +/* Set E( K ) to zero */ + + if (k < *n) { + i__1 = k; + e[i__1].r = 0., e[i__1].i = 0.; + } + + } else { + +/* ============================================================ */ + +/* BEGIN pivot search */ + +/* Case(1) */ +/* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX */ +/* (used to handle NaN and Inf) */ + + if (! (absakk < alpha * colmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else { + + done = FALSE_; + +/* Loop until pivot found */ + +L42: + +/* BEGIN pivot search loop body */ + + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine both ROWMAX and JMAX. */ + + if (imax != k) { + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(& + a[imax + jmax * a_dim1]), abs(d__2)); + } else { + rowmax = 0.; + } + + if (imax < *n) { + i__1 = *n - imax; + itemp = imax + izamax_(&i__1, &a[imax + 1 + imax * a_dim1] + , &c__1); + i__1 = itemp + imax * a_dim1; + dtemp = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[ + itemp + imax * a_dim1]), abs(d__2)); + if (dtemp > rowmax) { + rowmax = dtemp; + jmax = itemp; + } + } + +/* Case(2) */ +/* Equivalent to testing for */ +/* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX */ +/* (used to handle NaN and Inf) */ + + i__1 = imax + imax * a_dim1; + if (! ((d__1 = a[i__1].r, abs(d__1)) < alpha * rowmax)) { + +/* interchange rows and columns K and IMAX, */ +/* use 1-by-1 pivot block */ + + kp = imax; + done = TRUE_; + +/* Case(3) */ +/* Equivalent to testing for ROWMAX.EQ.COLMAX, */ +/* (used to handle NaN and Inf) */ + + } else if (p == jmax || rowmax <= colmax) { + +/* interchange rows and columns K+1 and IMAX, */ +/* use 2-by-2 pivot block */ + + kp = imax; + kstep = 2; + done = TRUE_; + +/* Case(4) */ + } else { + +/* Pivot not found: set params and repeat */ + + p = imax; + colmax = rowmax; + imax = jmax; + } + + +/* END pivot search loop body */ + + if (! done) { + goto L42; + } + + } + +/* END pivot search */ + +/* ============================================================ */ + +/* KK is the column of A where pivoting step stopped */ + + kk = k + kstep - 1; + +/* For only a 2x2 pivot, interchange rows and columns K and P */ +/* in the trailing submatrix A(k:n,k:n) */ + + if (kstep == 2 && p != k) { +/* (1) Swap columnar parts */ + if (p < *n) { + i__1 = *n - p; + zswap_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + 1 + p + * a_dim1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = p - 1; + for (j = k + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + k * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + k * a_dim1; + d_cnjg(&z__1, &a[p + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = p + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L44: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = p + k * a_dim1; + d_cnjg(&z__1, &a[p + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = k + k * a_dim1; + r1 = a[i__1].r; + i__1 = k + k * a_dim1; + i__2 = p + p * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = p + p * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + +/* Convert lower triangle of A into L form by applying */ +/* the interchanges in columns 1:k-1. */ + + if (k > 1) { + i__1 = k - 1; + zswap_(&i__1, &a[k + a_dim1], lda, &a[p + a_dim1], lda); + } + + } + +/* For both 1x1 and 2x2 pivots, interchange rows and */ +/* columns KK and KP in the trailing submatrix A(k:n,k:n) */ + + if (kp != kk) { +/* (1) Swap columnar parts */ + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + + kp * a_dim1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L45: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = kp + kk * a_dim1; + d_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + + if (kstep == 2) { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; +/* (5) Swap row elements */ + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + +/* Convert lower triangle of A into L form by applying */ +/* the interchanges in columns 1:k-1. */ + + if (k > 1) { + i__1 = k - 1; + zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + + } else { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + +/* Update the trailing submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k of A now holds */ + +/* W(k) = L(k)*D(k), */ + +/* where L(k) is the k-th column of L */ + + if (k < *n) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) and */ +/* store L(k) in column k */ + +/* Handle division by a small number */ + + i__1 = k + k * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= sfmin) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - L(k)*D(k)*L(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ + + i__1 = k + k * a_dim1; + d11 = 1. / a[i__1].r; + i__1 = *n - k; + d__1 = -d11; + zher_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], & + c__1, &a[k + 1 + (k + 1) * a_dim1], lda); + +/* Store L(k) in column k */ + + i__1 = *n - k; + zdscal_(&i__1, &d11, &a[k + 1 + k * a_dim1], &c__1); + } else { + +/* Store L(k) in column k */ + + i__1 = k + k * a_dim1; + d11 = a[i__1].r; + i__1 = *n; + for (ii = k + 1; ii <= i__1; ++ii) { + i__2 = ii + k * a_dim1; + i__3 = ii + k * a_dim1; + z__1.r = a[i__3].r / d11, z__1.i = a[i__3].i / + d11; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L46: */ + } + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - L(k)*D(k)*L(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ +/* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T */ + + i__1 = *n - k; + d__1 = -d11; + zher_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], & + c__1, &a[k + 1 + (k + 1) * a_dim1], lda); + } + +/* Store the subdiagonal element of D in array E */ + + i__1 = k; + e[i__1].r = 0., e[i__1].i = 0.; + + } + + } else { + +/* 2-by-2 pivot block D(k): columns k and k+1 now hold */ + +/* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */ + +/* where L(k) and L(k+1) are the k-th and (k+1)-th columns */ +/* of L */ + + +/* Perform a rank-2 update of A(k+2:n,k+2:n) as */ + +/* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T */ +/* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T */ + +/* and store L(k) and L(k+1) in columns k and k+1 */ + + if (k < *n - 1) { +/* D = |A21| */ + i__1 = k + 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_imag(&a[k + 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k + 1 + (k + 1) * a_dim1; + d11 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d22 = a[i__1].r / d__; + i__1 = k + 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d21.r = z__1.r, d21.i = z__1.i; + tt = 1. / (d11 * d22 - 1.); + + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + +/* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J */ + + i__2 = j + k * a_dim1; + z__3.r = d11 * a[i__2].r, z__3.i = d11 * a[i__2].i; + i__3 = j + (k + 1) * a_dim1; + z__4.r = d21.r * a[i__3].r - d21.i * a[i__3].i, + z__4.i = d21.r * a[i__3].i + d21.i * a[i__3] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = tt * z__2.r, z__1.i = tt * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__3.r = d22 * a[i__2].r, z__3.i = d22 * a[i__2].i; + d_cnjg(&z__5, &d21); + i__3 = j + k * a_dim1; + z__4.r = z__5.r * a[i__3].r - z__5.i * a[i__3].i, + z__4.i = z__5.r * a[i__3].i + z__5.i * a[i__3] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = tt * z__2.r, z__1.i = tt * z__2.i; + wkp1.r = z__1.r, wkp1.i = z__1.i; + +/* Perform a rank-2 update of A(k+2:n,k+2:n) */ + + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + z__4.r = a[i__5].r / d__, z__4.i = a[i__5].i / + d__; + d_cnjg(&z__5, &wk); + z__3.r = z__4.r * z__5.r - z__4.i * z__5.i, + z__3.i = z__4.r * z__5.i + z__4.i * + z__5.r; + z__2.r = a[i__4].r - z__3.r, z__2.i = a[i__4].i - + z__3.i; + i__6 = i__ + (k + 1) * a_dim1; + z__7.r = a[i__6].r / d__, z__7.i = a[i__6].i / + d__; + d_cnjg(&z__8, &wkp1); + z__6.r = z__7.r * z__8.r - z__7.i * z__8.i, + z__6.i = z__7.r * z__8.i + z__7.i * + z__8.r; + z__1.r = z__2.r - z__6.r, z__1.i = z__2.i - + z__6.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L50: */ + } + +/* Store L(k) and L(k+1) in cols k and k+1 for row J */ + + i__2 = j + k * a_dim1; + z__1.r = wk.r / d__, z__1.i = wk.i / d__; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__1.r = wkp1.r / d__, z__1.i = wkp1.i / d__; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* (*) Make sure that diagonal element of pivot is real */ + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + z__1.r = d__1, z__1.i = 0.; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + +/* L60: */ + } + + } + +/* Copy subdiagonal elements of D(K) to E(K) and */ +/* ZERO out subdiagonal entry of A */ + + i__1 = k; + i__2 = k + 1 + k * a_dim1; + e[i__1].r = a[i__2].r, e[i__1].i = a[i__2].i; + i__1 = k + 1; + e[i__1].r = 0., e[i__1].i = 0.; + i__1 = k + 1 + k * a_dim1; + a[i__1].r = 0., a[i__1].i = 0.; + + } + +/* End column K is nonsingular */ + + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -p; + ipiv[k + 1] = -kp; + } + +/* Increase K and return to the start of the main loop */ + + k += kstep; + goto L40; + +L64: + + ; + } + + return 0; + +/* End of ZHETF2_RK */ + +} /* zhetf2_rk__ */ + diff --git a/lapack-netlib/SRC/zhetf2_rook.c b/lapack-netlib/SRC/zhetf2_rook.c new file mode 100644 index 000000000..7dac9fc2f --- /dev/null +++ b/lapack-netlib/SRC/zhetf2_rook.c @@ -0,0 +1,1569 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHETF2_ROOK computes the factorization of a complex Hermitian indefinite matrix using the bound +ed Bunch-Kaufman ("rook") diagonal pivoting method (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETF2_ROOK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX*16 A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHETF2_ROOK computes the factorization of a complex Hermitian matrix A */ +/* > using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: */ +/* > */ +/* > A = U*D*U**H or A = L*D*L**H */ +/* > */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, U**H is the conjugate transpose of U, and D is */ +/* > Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, the block diagonal matrix D and the multipliers used */ +/* > to obtain the factor U or L (see below for further details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D. */ +/* > */ +/* > If UPLO = 'U': */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and */ +/* > columns k and -IPIV(k) were interchanged and rows and */ +/* > columns k-1 and -IPIV(k-1) were inerchaged, */ +/* > D(k-1:k,k-1:k) is a 2-by-2 diagonal block. */ +/* > */ +/* > If UPLO = 'L': */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) */ +/* > were interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and */ +/* > columns k and -IPIV(k) were interchanged and rows and */ +/* > columns k+1 and -IPIV(k+1) were inerchaged, */ +/* > D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, D(k,k) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, and division by zero will occur if it */ +/* > is used to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2013 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', then A = U*D*U**H, where */ +/* > U = P(n)*U(n)* ... *P(k)U(k)* ..., */ +/* > i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ +/* > 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* > and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* > defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ +/* > that if the diagonal block D(k) is of order s (s = 1 or 2), then */ +/* > */ +/* > ( I v 0 ) k-s */ +/* > U(k) = ( 0 I 0 ) s */ +/* > ( 0 0 I ) n-k */ +/* > k-s s n-k */ +/* > */ +/* > If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ +/* > If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ +/* > and A(k,k), and v overwrites A(1:k-2,k-1:k). */ +/* > */ +/* > If UPLO = 'L', then A = L*D*L**H, where */ +/* > L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ +/* > i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ +/* > n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* > and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* > defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ +/* > that if the diagonal block D(k) is of order s (s = 1 or 2), then */ +/* > */ +/* > ( I 0 0 ) k-1 */ +/* > L(k) = ( 0 I 0 ) s */ +/* > ( 0 v I ) n-k-s+1 */ +/* > k-1 s n-k-s+1 */ +/* > */ +/* > If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ +/* > If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ +/* > and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2013, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > 01-01-96 - Based on modifications by */ +/* > J. Lewis, Boeing Computer Services Company */ +/* > A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int zhetf2_rook_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8; + + /* Local variables */ + logical done; + integer imax, jmax; + extern /* Subroutine */ int zher_(char *, integer *, doublereal *, + doublecomplex *, integer *, doublecomplex *, integer *); + doublereal d__; + integer i__, j, k, p; + doublecomplex t; + doublereal alpha; + extern logical lsame_(char *, char *); + doublereal dtemp, sfmin; + integer itemp, kstep; + logical upper; + doublereal r1; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + extern doublereal dlapy2_(doublereal *, doublereal *); + doublereal d11; + doublecomplex d12; + doublereal d22; + doublecomplex d21; + integer ii, kk; + extern doublereal dlamch_(char *); + integer kp; + doublereal absakk; + doublecomplex wk; + doublereal tt; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + doublereal colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + doublecomplex wkm1, wkp1; + + +/* -- LAPACK computational routine (version 3.5.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2013 */ + + +/* ====================================================================== */ + + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETF2_ROOK", &i__1, (ftnlen)11); + return 0; + } + +/* Initialize ALPHA for use in choosing pivot block size. */ + + alpha = (sqrt(17.) + 1.) / 8.; + +/* Compute machine safe minimum */ + + sfmin = dlamch_("S"); + + if (upper) { + +/* Factorize A as U*D*U**H using the upper triangle of A */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2 */ + + k = *n; +L10: + +/* If K < 1, exit from loop */ + + if (k < 1) { + goto L70; + } + kstep = 1; + p = k; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[imax + + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + + if (f2cmax(absakk,colmax) == 0.) { + +/* Column K is zero or underflow: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + +/* ============================================================ */ + +/* BEGIN pivot search */ + +/* Case(1) */ +/* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX */ +/* (used to handle NaN and Inf) */ + + if (! (absakk < alpha * colmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else { + + done = FALSE_; + +/* Loop until pivot found */ + +L12: + +/* BEGIN pivot search loop body */ + + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine both ROWMAX and JMAX. */ + + if (imax != k) { + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &a[imax + (imax + 1) * + a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(& + a[imax + jmax * a_dim1]), abs(d__2)); + } else { + rowmax = 0.; + } + + if (imax > 1) { + i__1 = imax - 1; + itemp = izamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = itemp + imax * a_dim1; + dtemp = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[ + itemp + imax * a_dim1]), abs(d__2)); + if (dtemp > rowmax) { + rowmax = dtemp; + jmax = itemp; + } + } + +/* Case(2) */ +/* Equivalent to testing for */ +/* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX */ +/* (used to handle NaN and Inf) */ + + i__1 = imax + imax * a_dim1; + if (! ((d__1 = a[i__1].r, abs(d__1)) < alpha * rowmax)) { + +/* interchange rows and columns K and IMAX, */ +/* use 1-by-1 pivot block */ + + kp = imax; + done = TRUE_; + +/* Case(3) */ +/* Equivalent to testing for ROWMAX.EQ.COLMAX, */ +/* (used to handle NaN and Inf) */ + + } else if (p == jmax || rowmax <= colmax) { + +/* interchange rows and columns K-1 and IMAX, */ +/* use 2-by-2 pivot block */ + + kp = imax; + kstep = 2; + done = TRUE_; + +/* Case(4) */ + } else { + +/* Pivot not found: set params and repeat */ + + p = imax; + colmax = rowmax; + imax = jmax; + } + +/* END pivot search loop body */ + + if (! done) { + goto L12; + } + + } + +/* END pivot search */ + +/* ============================================================ */ + +/* KK is the column of A where pivoting step stopped */ + + kk = k - kstep + 1; + +/* For only a 2x2 pivot, interchange rows and columns K and P */ +/* in the leading submatrix A(1:k,1:k) */ + + if (kstep == 2 && p != k) { +/* (1) Swap columnar parts */ + if (p > 1) { + i__1 = p - 1; + zswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 + + 1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = k - 1; + for (j = p + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + k * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + k * a_dim1; + d_cnjg(&z__1, &a[p + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = p + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L14: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = p + k * a_dim1; + d_cnjg(&z__1, &a[p + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = k + k * a_dim1; + r1 = a[i__1].r; + i__1 = k + k * a_dim1; + i__2 = p + p * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = p + p * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + } + +/* For both 1x1 and 2x2 pivots, interchange rows and */ +/* columns KK and KP in the leading submatrix A(1:k,1:k) */ + + if (kp != kk) { +/* (1) Swap columnar parts */ + if (kp > 1) { + i__1 = kp - 1; + zswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + + 1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L15: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = kp + kk * a_dim1; + d_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + + if (kstep == 2) { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; +/* (5) Swap row elements */ + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + +/* Update the leading submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = U(k)*D(k) */ + +/* where U(k) is the k-th column of U */ + + if (k > 1) { + +/* Perform a rank-1 update of A(1:k-1,1:k-1) and */ +/* store U(k) in column k */ + + i__1 = k + k * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= sfmin) { + +/* Perform a rank-1 update of A(1:k-1,1:k-1) as */ +/* A := A - U(k)*D(k)*U(k)**T */ +/* = A - W(k)*1/D(k)*W(k)**T */ + + i__1 = k + k * a_dim1; + d11 = 1. / a[i__1].r; + i__1 = k - 1; + d__1 = -d11; + zher_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, & + a[a_offset], lda); + +/* Store U(k) in column k */ + + i__1 = k - 1; + zdscal_(&i__1, &d11, &a[k * a_dim1 + 1], &c__1); + } else { + +/* Store L(k) in column K */ + + i__1 = k + k * a_dim1; + d11 = a[i__1].r; + i__1 = k - 1; + for (ii = 1; ii <= i__1; ++ii) { + i__2 = ii + k * a_dim1; + i__3 = ii + k * a_dim1; + z__1.r = a[i__3].r / d11, z__1.i = a[i__3].i / + d11; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L16: */ + } + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - U(k)*D(k)*U(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ +/* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T */ + + i__1 = k - 1; + d__1 = -d11; + zher_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, & + a[a_offset], lda); + } + } + + } else { + +/* 2-by-2 pivot block D(k): columns k and k-1 now hold */ + +/* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ + +/* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ +/* of U */ + +/* Perform a rank-2 update of A(1:k-2,1:k-2) as */ + +/* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T */ +/* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T */ + +/* and store L(k) and L(k+1) in columns k and k+1 */ + + if (k > 2) { +/* D = |A12| */ + i__1 = k - 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_imag(&a[k - 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d11 = z__1.r; + i__1 = k - 1 + (k - 1) * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d22 = z__1.r; + i__1 = k - 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d12.r = z__1.r, d12.i = z__1.i; + tt = 1. / (d11 * d22 - 1.); + + for (j = k - 2; j >= 1; --j) { + +/* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J */ + + i__1 = j + (k - 1) * a_dim1; + z__3.r = d11 * a[i__1].r, z__3.i = d11 * a[i__1].i; + d_cnjg(&z__5, &d12); + i__2 = j + k * a_dim1; + z__4.r = z__5.r * a[i__2].r - z__5.i * a[i__2].i, + z__4.i = z__5.r * a[i__2].i + z__5.i * a[i__2] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = tt * z__2.r, z__1.i = tt * z__2.i; + wkm1.r = z__1.r, wkm1.i = z__1.i; + i__1 = j + k * a_dim1; + z__3.r = d22 * a[i__1].r, z__3.i = d22 * a[i__1].i; + i__2 = j + (k - 1) * a_dim1; + z__4.r = d12.r * a[i__2].r - d12.i * a[i__2].i, + z__4.i = d12.r * a[i__2].i + d12.i * a[i__2] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = tt * z__2.r, z__1.i = tt * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + +/* Perform a rank-2 update of A(1:k-2,1:k-2) */ + + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + z__4.r = a[i__3].r / d__, z__4.i = a[i__3].i / + d__; + d_cnjg(&z__5, &wk); + z__3.r = z__4.r * z__5.r - z__4.i * z__5.i, + z__3.i = z__4.r * z__5.i + z__4.i * + z__5.r; + z__2.r = a[i__2].r - z__3.r, z__2.i = a[i__2].i - + z__3.i; + i__4 = i__ + (k - 1) * a_dim1; + z__7.r = a[i__4].r / d__, z__7.i = a[i__4].i / + d__; + d_cnjg(&z__8, &wkm1); + z__6.r = z__7.r * z__8.r - z__7.i * z__8.i, + z__6.i = z__7.r * z__8.i + z__7.i * + z__8.r; + z__1.r = z__2.r - z__6.r, z__1.i = z__2.i - + z__6.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* L20: */ + } + +/* Store U(k) and U(k-1) in cols k and k-1 for row J */ + + i__1 = j + k * a_dim1; + z__1.r = wk.r / d__, z__1.i = wk.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = j + (k - 1) * a_dim1; + z__1.r = wkm1.r / d__, z__1.i = wkm1.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = j + j * a_dim1; + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + z__1.r = d__1, z__1.i = 0.; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* L30: */ + } + + } + + } + + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -p; + ipiv[k - 1] = -kp; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kstep; + goto L10; + + } else { + +/* Factorize A as L*D*L**H using the lower triangle of A */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2 */ + + k = 1; +L40: + +/* If K > N, exit from loop */ + + if (k > *n) { + goto L70; + } + kstep = 1; + p = k; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[imax + + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + + if (f2cmax(absakk,colmax) == 0.) { + +/* Column K is zero or underflow: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + +/* ============================================================ */ + +/* BEGIN pivot search */ + +/* Case(1) */ +/* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX */ +/* (used to handle NaN and Inf) */ + + if (! (absakk < alpha * colmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else { + + done = FALSE_; + +/* Loop until pivot found */ + +L42: + +/* BEGIN pivot search loop body */ + + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine both ROWMAX and JMAX. */ + + if (imax != k) { + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(& + a[imax + jmax * a_dim1]), abs(d__2)); + } else { + rowmax = 0.; + } + + if (imax < *n) { + i__1 = *n - imax; + itemp = imax + izamax_(&i__1, &a[imax + 1 + imax * a_dim1] + , &c__1); + i__1 = itemp + imax * a_dim1; + dtemp = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_imag(&a[ + itemp + imax * a_dim1]), abs(d__2)); + if (dtemp > rowmax) { + rowmax = dtemp; + jmax = itemp; + } + } + +/* Case(2) */ +/* Equivalent to testing for */ +/* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX */ +/* (used to handle NaN and Inf) */ + + i__1 = imax + imax * a_dim1; + if (! ((d__1 = a[i__1].r, abs(d__1)) < alpha * rowmax)) { + +/* interchange rows and columns K and IMAX, */ +/* use 1-by-1 pivot block */ + + kp = imax; + done = TRUE_; + +/* Case(3) */ +/* Equivalent to testing for ROWMAX.EQ.COLMAX, */ +/* (used to handle NaN and Inf) */ + + } else if (p == jmax || rowmax <= colmax) { + +/* interchange rows and columns K+1 and IMAX, */ +/* use 2-by-2 pivot block */ + + kp = imax; + kstep = 2; + done = TRUE_; + +/* Case(4) */ + } else { + +/* Pivot not found: set params and repeat */ + + p = imax; + colmax = rowmax; + imax = jmax; + } + + +/* END pivot search loop body */ + + if (! done) { + goto L42; + } + + } + +/* END pivot search */ + +/* ============================================================ */ + +/* KK is the column of A where pivoting step stopped */ + + kk = k + kstep - 1; + +/* For only a 2x2 pivot, interchange rows and columns K and P */ +/* in the trailing submatrix A(k:n,k:n) */ + + if (kstep == 2 && p != k) { +/* (1) Swap columnar parts */ + if (p < *n) { + i__1 = *n - p; + zswap_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + 1 + p + * a_dim1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = p - 1; + for (j = k + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + k * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + k * a_dim1; + d_cnjg(&z__1, &a[p + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = p + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L44: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = p + k * a_dim1; + d_cnjg(&z__1, &a[p + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = k + k * a_dim1; + r1 = a[i__1].r; + i__1 = k + k * a_dim1; + i__2 = p + p * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = p + p * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + } + +/* For both 1x1 and 2x2 pivots, interchange rows and */ +/* columns KK and KP in the trailing submatrix A(k:n,k:n) */ + + if (kp != kk) { +/* (1) Swap columnar parts */ + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + + kp * a_dim1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + d_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L45: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = kp + kk * a_dim1; + d_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + + if (kstep == 2) { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; +/* (5) Swap row elements */ + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + +/* Update the trailing submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k of A now holds */ + +/* W(k) = L(k)*D(k), */ + +/* where L(k) is the k-th column of L */ + + if (k < *n) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) and */ +/* store L(k) in column k */ + +/* Handle division by a small number */ + + i__1 = k + k * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= sfmin) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - L(k)*D(k)*L(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ + + i__1 = k + k * a_dim1; + d11 = 1. / a[i__1].r; + i__1 = *n - k; + d__1 = -d11; + zher_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], & + c__1, &a[k + 1 + (k + 1) * a_dim1], lda); + +/* Store L(k) in column k */ + + i__1 = *n - k; + zdscal_(&i__1, &d11, &a[k + 1 + k * a_dim1], &c__1); + } else { + +/* Store L(k) in column k */ + + i__1 = k + k * a_dim1; + d11 = a[i__1].r; + i__1 = *n; + for (ii = k + 1; ii <= i__1; ++ii) { + i__2 = ii + k * a_dim1; + i__3 = ii + k * a_dim1; + z__1.r = a[i__3].r / d11, z__1.i = a[i__3].i / + d11; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L46: */ + } + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - L(k)*D(k)*L(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ +/* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T */ + + i__1 = *n - k; + d__1 = -d11; + zher_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], & + c__1, &a[k + 1 + (k + 1) * a_dim1], lda); + } + } + + } else { + +/* 2-by-2 pivot block D(k): columns k and k+1 now hold */ + +/* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */ + +/* where L(k) and L(k+1) are the k-th and (k+1)-th columns */ +/* of L */ + + +/* Perform a rank-2 update of A(k+2:n,k+2:n) as */ + +/* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T */ +/* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T */ + +/* and store L(k) and L(k+1) in columns k and k+1 */ + + if (k < *n - 1) { +/* D = |A21| */ + i__1 = k + 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_imag(&a[k + 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k + 1 + (k + 1) * a_dim1; + d11 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d22 = a[i__1].r / d__; + i__1 = k + 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d21.r = z__1.r, d21.i = z__1.i; + tt = 1. / (d11 * d22 - 1.); + + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + +/* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J */ + + i__2 = j + k * a_dim1; + z__3.r = d11 * a[i__2].r, z__3.i = d11 * a[i__2].i; + i__3 = j + (k + 1) * a_dim1; + z__4.r = d21.r * a[i__3].r - d21.i * a[i__3].i, + z__4.i = d21.r * a[i__3].i + d21.i * a[i__3] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = tt * z__2.r, z__1.i = tt * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__3.r = d22 * a[i__2].r, z__3.i = d22 * a[i__2].i; + d_cnjg(&z__5, &d21); + i__3 = j + k * a_dim1; + z__4.r = z__5.r * a[i__3].r - z__5.i * a[i__3].i, + z__4.i = z__5.r * a[i__3].i + z__5.i * a[i__3] + .r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = tt * z__2.r, z__1.i = tt * z__2.i; + wkp1.r = z__1.r, wkp1.i = z__1.i; + +/* Perform a rank-2 update of A(k+2:n,k+2:n) */ + + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + z__4.r = a[i__5].r / d__, z__4.i = a[i__5].i / + d__; + d_cnjg(&z__5, &wk); + z__3.r = z__4.r * z__5.r - z__4.i * z__5.i, + z__3.i = z__4.r * z__5.i + z__4.i * + z__5.r; + z__2.r = a[i__4].r - z__3.r, z__2.i = a[i__4].i - + z__3.i; + i__6 = i__ + (k + 1) * a_dim1; + z__7.r = a[i__6].r / d__, z__7.i = a[i__6].i / + d__; + d_cnjg(&z__8, &wkp1); + z__6.r = z__7.r * z__8.r - z__7.i * z__8.i, + z__6.i = z__7.r * z__8.i + z__7.i * + z__8.r; + z__1.r = z__2.r - z__6.r, z__1.i = z__2.i - + z__6.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L50: */ + } + +/* Store L(k) and L(k+1) in cols k and k+1 for row J */ + + i__2 = j + k * a_dim1; + z__1.r = wk.r / d__, z__1.i = wk.i / d__; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__1.r = wkp1.r / d__, z__1.i = wkp1.i / d__; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* (*) Make sure that diagonal element of pivot is real */ + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + z__1.r = d__1, z__1.i = 0.; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + +/* L60: */ + } + + } + + } + + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -p; + ipiv[k + 1] = -kp; + } + +/* Increase K and return to the start of the main loop */ + + k += kstep; + goto L40; + + } + +L70: + + return 0; + +/* End of ZHETF2_ROOK */ + +} /* zhetf2_rook__ */ + diff --git a/lapack-netlib/SRC/zhetrd.c b/lapack-netlib/SRC/zhetrd.c new file mode 100644 index 000000000..083ef8e91 --- /dev/null +++ b/lapack-netlib/SRC/zhetrd.c @@ -0,0 +1,815 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHETRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* DOUBLE PRECISION D( * ), E( * ) */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHETRD reduces a complex Hermitian matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1. */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n-1) . . . H(2) H(1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* > A(1:i-1,i+1), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(n-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( d e v2 v3 v4 ) ( d ) */ +/* > ( d e v3 v4 ) ( e d ) */ +/* > ( d e v4 ) ( v1 e d ) */ +/* > ( d e ) ( v1 v2 e d ) */ +/* > ( d ) ( v1 v2 v3 e d ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhetrd_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *); + integer nbmin, iinfo; + logical upper; + extern /* Subroutine */ int zhetd2_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *), zher2k_(char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, doublecomplex *, integer *); + integer nb, kk, nx; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlatrd_(char *, integer *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + doublecomplex *, integer *); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* Determine when to cross over from blocked to unblocked code */ +/* (last block is always handled by unblocked code). */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, "ZHETRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code by setting NX = N. */ + +/* Computing MAX */ + i__1 = *lwork / ldwork; + nb = f2cmax(i__1,1); + nbmin = ilaenv_(&c__2, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* Reduce the upper triangle of A. */ +/* Columns 1:kk are handled by the unblocked method. */ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = i__ + nb - 1; + zlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* Update the unreduced submatrix A(1:i-1,1:i-1), using an */ +/* update of the form: A := A - V*W**H - W*V**H */ + + i__3 = i__ - 1; + z__1.r = -1., z__1.i = 0.; + zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ * a_dim1 + + 1], lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda); + +/* Copy superdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j - 1 + j * a_dim1; + i__5 = j - 1; + a[i__4].r = e[i__5], a[i__4].i = 0.; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + zhetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = *n - i__ + 1; + zlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* Update the unreduced submatrix A(i+nb:n,i+nb:n), using */ +/* an update of the form: A := A - V*W**H - W*V**H */ + + i__3 = *n - i__ - nb + 1; + z__1.r = -1., z__1.i = 0.; + zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b23, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy subdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + zhetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZHETRD */ + +} /* zhetrd_ */ + diff --git a/lapack-netlib/SRC/zhetrd_2stage.c b/lapack-netlib/SRC/zhetrd_2stage.c new file mode 100644 index 000000000..e00660288 --- /dev/null +++ b/lapack-netlib/SRC/zhetrd_2stage.c @@ -0,0 +1,746 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHETRD_2STAGE */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETRD_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, */ +/* HOUS2, LHOUS2, WORK, LWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER VECT, UPLO */ +/* INTEGER N, LDA, LWORK, LHOUS2, INFO */ +/* DOUBLE PRECISION D( * ), E( * ) */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), */ +/* HOUS2( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHETRD_2STAGE reduces a complex Hermitian matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q1**H Q2**H* A * Q2 * Q1 = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > = 'N': No need for the Housholder representation, */ +/* > in particular for the second stage (Band to */ +/* > tridiagonal) and thus LHOUS2 is of size f2cmax(1, 4*N); */ +/* > = 'V': the Householder representation is needed to */ +/* > either generate Q1 Q2 or to apply Q1 Q2, */ +/* > then LHOUS2 is to be queried and computed. */ +/* > (NOT AVAILABLE IN THIS RELEASE). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the band superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > internal band-diagonal matrix AB, and the elements above */ +/* > the KD superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q1 as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and band subdiagonal of A are over- */ +/* > written by the corresponding elements of the internal band-diagonal */ +/* > matrix AB, and the elements below the KD subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q1 as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N-KD) */ +/* > The scalar factors of the elementary reflectors of */ +/* > the first stage (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] HOUS2 */ +/* > \verbatim */ +/* > HOUS2 is COMPLEX*16 array, dimension (LHOUS2) */ +/* > Stores the Householder representation of the stage2 */ +/* > band to tridiagonal. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LHOUS2 */ +/* > \verbatim */ +/* > LHOUS2 is INTEGER */ +/* > The dimension of the array HOUS2. */ +/* > If LWORK = -1, or LHOUS2 = -1, */ +/* > then a query is assumed; the routine */ +/* > only calculates the optimal size of the HOUS2 array, returns */ +/* > this value as the first entry of the HOUS2 array, and no error */ +/* > message related to LHOUS2 is issued by XERBLA. */ +/* > If VECT='N', LHOUS2 = f2cmax(1, 4*n); */ +/* > if VECT='V', option not yet available. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK = MAX(1, dimension) */ +/* > If LWORK = -1, or LHOUS2=-1, */ +/* > then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Implemented by Azzam Haidar. */ +/* > */ +/* > All details are available on technical report, SC11, SC13 papers. */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhetrd_2stage_(char *vect, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, + doublecomplex *tau, doublecomplex *hous2, integer *lhous2, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + integer ldab; + extern /* Subroutine */ int zhetrd_he2hb_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *); + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + extern /* Subroutine */ int zhetrd_hb2st_(char *, char *, char *, + integer *, integer *, doublecomplex *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *); + integer lwrk, wpos; + extern logical lsame_(char *, char *); + integer abpos, lhmin, lwmin; + logical wantq, upper; + integer ib, kd; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical lquery; + + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tau; + --hous2; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1 || *lhous2 == -1; + +/* Determine the block size, the workspace size and the hous size. */ + + kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", vect, n, &c_n1, &c_n1, &c_n1); + ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", vect, n, &kd, &c_n1, &c_n1); + lhmin = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", vect, n, &kd, &ib, &c_n1); + lwmin = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", vect, n, &kd, &ib, &c_n1); +/* WRITE(*,*),'ZHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, */ +/* $ LHMIN, LWMIN */ + + if (! lsame_(vect, "N")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*lhous2 < lhmin && ! lquery) { + *info = -10; + } else if (*lwork < lwmin && ! lquery) { + *info = -12; + } + + if (*info == 0) { + hous2[1].r = (doublereal) lhmin, hous2[1].i = 0.; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETRD_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* Determine pointer position */ + + ldab = kd + 1; + lwrk = *lwork - ldab * *n; + abpos = 1; + wpos = abpos + ldab * *n; + zhetrd_he2hb_(uplo, n, &kd, &a[a_offset], lda, &work[abpos], &ldab, &tau[ + 1], &work[wpos], &lwrk, info); + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETRD_HE2HB", &i__1, (ftnlen)12); + return 0; + } + zhetrd_hb2st_("Y", vect, uplo, n, &kd, &work[abpos], &ldab, &d__[1], &e[ + 1], &hous2[1], lhous2, &work[wpos], &lwrk, info); + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETRD_HB2ST", &i__1, (ftnlen)12); + return 0; + } + + + hous2[1].r = (doublereal) lhmin, hous2[1].i = 0.; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + return 0; + +/* End of ZHETRD_2STAGE */ + +} /* zhetrd_2stage__ */ + diff --git a/lapack-netlib/SRC/zhetrd_hb2st.c b/lapack-netlib/SRC/zhetrd_hb2st.c new file mode 100644 index 000000000..745bb37a2 --- /dev/null +++ b/lapack-netlib/SRC/zhetrd_hb2st.c @@ -0,0 +1,999 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) ceil(w) +#define myhuge_(w) HUGE_VAL +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETRD_HB2ST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, */ +/* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) */ + +/* #if defined(_OPENMP) */ +/* use omp_lib */ +/* #endif */ + +/* IMPLICIT NONE */ + +/* CHARACTER STAGE1, UPLO, VECT */ +/* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO */ +/* DOUBLE PRECISION D( * ), E( * ) */ +/* COMPLEX*16 AB( LDAB, * ), HOUS( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] STAGE1 */ +/* > \verbatim */ +/* > STAGE1 is CHARACTER*1 */ +/* > = 'N': "No": to mention that the stage 1 of the reduction */ +/* > from dense to band using the zhetrd_he2hb routine */ +/* > was not called before this routine to reproduce AB. */ +/* > In other term this routine is called as standalone. */ +/* > = 'Y': "Yes": to mention that the stage 1 of the */ +/* > reduction from dense to band using the zhetrd_he2hb */ +/* > routine has been called to produce AB (e.g., AB is */ +/* > the output of zhetrd_he2hb. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > = 'N': No need for the Housholder representation, */ +/* > and thus LHOUS is of size f2cmax(1, 4*N); */ +/* > = 'V': the Householder representation is needed to */ +/* > either generate or to apply Q later on, */ +/* > then LHOUS is to be queried and computed. */ +/* > (NOT AVAILABLE IN THIS RELEASE). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB,N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > On exit, the diagonal elements of AB are overwritten by the */ +/* > diagonal elements of the tridiagonal matrix T; if KD > 0, the */ +/* > elements on the first superdiagonal (if UPLO = 'U') or the */ +/* > first subdiagonal (if UPLO = 'L') are overwritten by the */ +/* > off-diagonal elements of T; the rest of AB is overwritten by */ +/* > values generated during the reduction. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] HOUS */ +/* > \verbatim */ +/* > HOUS is COMPLEX*16 array, dimension LHOUS, that */ +/* > store the Householder representation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LHOUS */ +/* > \verbatim */ +/* > LHOUS is INTEGER */ +/* > The dimension of the array HOUS. LHOUS = MAX(1, dimension) */ +/* > If LWORK = -1, or LHOUS=-1, */ +/* > then a query is assumed; the routine */ +/* > only calculates the optimal size of the HOUS array, returns */ +/* > this value as the first entry of the HOUS array, and no error */ +/* > message related to LHOUS is issued by XERBLA. */ +/* > LHOUS = MAX(1, dimension) where */ +/* > dimension = 4*N if VECT='N' */ +/* > not available now if VECT='H' */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK = MAX(1, dimension) */ +/* > If LWORK = -1, or LHOUS=-1, */ +/* > then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = (2KD+1)*N + KD*NTHREADS */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Implemented by Azzam Haidar. */ +/* > */ +/* > All details are available on technical report, SC11, SC13 papers. */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhetrd_hb2st_(char *stage1, char *vect, char *uplo, + integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublereal + *d__, doublereal *e, doublecomplex *hous, integer *lhous, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5; + real r__1; + doublecomplex z__1; + + /* Local variables */ + integer inda; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + integer thed, myid, indw, apos, dpos, indv, abofdpos, nthreads, i__, k, m, + edind, debug; + extern logical lsame_(char *, char *); + integer lhmin, sizea, shift, stind, colpt, lwmin, awpos; + logical wantq, upper; + integer grsiz, sizev, ttype, stepercol, ed, ib, st, abdpos; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer thgrid, thgrnb, indtau; + doublereal abstmp; + integer ofdpos; + extern /* Subroutine */ int zhb2st_kernels_(char *, logical *, integer *, + integer *, integer *, integer *, integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), zlacpy_(char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *), zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *); + integer blklastind; + extern /* Subroutine */ int mecago_(); + logical lquery, afters1; + integer lda, tid, ldv; + doublecomplex tmp; + integer stt, sweepid, nbtiles, sizetau, thgrsiz; + + + + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Determine the minimal workspace size required. */ +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --d__; + --e; + --hous; + --work; + + /* Function Body */ + debug = 0; + *info = 0; + afters1 = lsame_(stage1, "Y"); + wantq = lsame_(vect, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1 || *lhous == -1; + +/* Determine the block size, the workspace size and the hous size. */ + + ib = ilaenv2stage_(&c__2, "ZHETRD_HB2ST", vect, n, kd, &c_n1, &c_n1); + lhmin = ilaenv2stage_(&c__3, "ZHETRD_HB2ST", vect, n, kd, &ib, &c_n1); + lwmin = ilaenv2stage_(&c__4, "ZHETRD_HB2ST", vect, n, kd, &ib, &c_n1); + + if (! afters1 && ! lsame_(stage1, "N")) { + *info = -1; + } else if (! lsame_(vect, "N")) { + *info = -2; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*ldab < *kd + 1) { + *info = -7; + } else if (*lhous < lhmin && ! lquery) { + *info = -11; + } else if (*lwork < lwmin && ! lquery) { + *info = -13; + } + + if (*info == 0) { + hous[1].r = (doublereal) lhmin, hous[1].i = 0.; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETRD_HB2ST", &i__1, (ftnlen)12); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + hous[1].r = 1., hous[1].i = 0.; + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* Determine pointer position */ + + ldv = *kd + ib; + sizetau = *n << 1; + sizev = *n << 1; + indtau = 1; + indv = indtau + sizetau; + lda = (*kd << 1) + 1; + sizea = lda * *n; + inda = 1; + indw = inda + sizea; + nthreads = 1; + tid = 0; + + if (upper) { + apos = inda + *kd; + awpos = inda; + dpos = apos + *kd; + ofdpos = dpos - 1; + abdpos = *kd + 1; + abofdpos = *kd; + } else { + apos = inda; + awpos = inda + *kd + 1; + dpos = apos; + ofdpos = dpos + 1; + abdpos = 1; + abofdpos = 2; + } + +/* Case KD=0: */ +/* The matrix is diagonal. We just copy it (convert to "real" for */ +/* complex because D is double and the imaginary part should be 0) */ +/* and store it in D. A sequential code here is better or */ +/* in a parallel environment it might need two cores for D and E */ + + if (*kd == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = abdpos + i__ * ab_dim1; + d__[i__] = ab[i__2].r; +/* L30: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.; +/* L40: */ + } + + hous[1].r = 1., hous[1].i = 0.; + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* Case KD=1: */ +/* The matrix is already Tridiagonal. We have to make diagonal */ +/* and offdiagonal elements real, and store them in D and E. */ +/* For that, for real precision just copy the diag and offdiag */ +/* to D and E while for the COMPLEX case the bulge chasing is */ +/* performed to convert the hermetian tridiagonal to symmetric */ +/* tridiagonal. A simpler coversion formula might be used, but then */ +/* updating the Q matrix will be required and based if Q is generated */ +/* or not this might complicate the story. */ + + if (*kd == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = abdpos + i__ * ab_dim1; + d__[i__] = ab[i__2].r; +/* L50: */ + } + +/* make off-diagonal elements real and copy them to E */ + + if (upper) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = abofdpos + (i__ + 1) * ab_dim1; + tmp.r = ab[i__2].r, tmp.i = ab[i__2].i; + abstmp = z_abs(&tmp); + i__2 = abofdpos + (i__ + 1) * ab_dim1; + ab[i__2].r = abstmp, ab[i__2].i = 0.; + e[i__] = abstmp; + if (abstmp != 0.) { + z__1.r = tmp.r / abstmp, z__1.i = tmp.i / abstmp; + tmp.r = z__1.r, tmp.i = z__1.i; + } else { + tmp.r = 1., tmp.i = 0.; + } + if (i__ < *n - 1) { + i__2 = abofdpos + (i__ + 2) * ab_dim1; + i__3 = abofdpos + (i__ + 2) * ab_dim1; + z__1.r = ab[i__3].r * tmp.r - ab[i__3].i * tmp.i, z__1.i = + ab[i__3].r * tmp.i + ab[i__3].i * tmp.r; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; + } +/* IF( WANTZ ) THEN */ +/* CALL ZSCAL( N, DCONJG( TMP ), Q( 1, I+1 ), 1 ) */ +/* END IF */ +/* L60: */ + } + } else { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = abofdpos + i__ * ab_dim1; + tmp.r = ab[i__2].r, tmp.i = ab[i__2].i; + abstmp = z_abs(&tmp); + i__2 = abofdpos + i__ * ab_dim1; + ab[i__2].r = abstmp, ab[i__2].i = 0.; + e[i__] = abstmp; + if (abstmp != 0.) { + z__1.r = tmp.r / abstmp, z__1.i = tmp.i / abstmp; + tmp.r = z__1.r, tmp.i = z__1.i; + } else { + tmp.r = 1., tmp.i = 0.; + } + if (i__ < *n - 1) { + i__2 = abofdpos + (i__ + 1) * ab_dim1; + i__3 = abofdpos + (i__ + 1) * ab_dim1; + z__1.r = ab[i__3].r * tmp.r - ab[i__3].i * tmp.i, z__1.i = + ab[i__3].r * tmp.i + ab[i__3].i * tmp.r; + ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; + } +/* IF( WANTQ ) THEN */ +/* CALL ZSCAL( N, TMP, Q( 1, I+1 ), 1 ) */ +/* END IF */ +/* L70: */ + } + } + + hous[1].r = 1., hous[1].i = 0.; + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* Main code start here. */ +/* Reduce the hermitian band of A to a tridiagonal matrix. */ + + thgrsiz = *n; + grsiz = 1; + shift = 3; + r__1 = (real) (*n) / (real) (*kd) + .5f; + nbtiles = r_int(&r__1); + r__1 = (real) shift / (real) grsiz + .5f; + stepercol = r_int(&r__1); + r__1 = (real) (*n - 1) / (real) thgrsiz + .5f; + thgrnb = r_int(&r__1); + + i__1 = *kd + 1; + zlacpy_("A", &i__1, n, &ab[ab_offset], ldab, &work[apos], &lda) + ; + zlaset_("A", kd, n, &c_b1, &c_b1, &work[awpos], &lda); + + +/* openMP parallelisation start here */ + + +/* main bulge chasing loop */ + + i__1 = thgrnb; + for (thgrid = 1; thgrid <= i__1; ++thgrid) { + stt = (thgrid - 1) * thgrsiz + 1; +/* Computing MIN */ + i__2 = stt + thgrsiz - 1, i__3 = *n - 1; + thed = f2cmin(i__2,i__3); + i__2 = *n - 1; + for (i__ = stt; i__ <= i__2; ++i__) { + ed = f2cmin(i__,thed); + if (stt > ed) { + myexit_(); + } + i__3 = stepercol; + for (m = 1; m <= i__3; ++m) { + st = stt; + i__4 = ed; + for (sweepid = st; sweepid <= i__4; ++sweepid) { + i__5 = grsiz; + for (k = 1; k <= i__5; ++k) { + myid = (i__ - sweepid) * (stepercol * grsiz) + (m - 1) + * grsiz + k; + if (myid == 1) { + ttype = 1; + } else { + ttype = myid % 2 + 2; + } + if (ttype == 2) { + colpt = myid / 2 * *kd + sweepid; + stind = colpt - *kd + 1; + edind = f2cmin(colpt,*n); + blklastind = colpt; + } else { + colpt = (myid + 1) / 2 * *kd + sweepid; + stind = colpt - *kd + 1; + edind = f2cmin(colpt,*n); + if (stind >= edind - 1 && edind == *n) { + blklastind = *n; + } else { + blklastind = 0; + } + } + +/* Call the kernel */ + + zhb2st_kernels_(uplo, &wantq, &ttype, &stind, &edind, + &sweepid, n, kd, &ib, &work[inda], &lda, & + hous[indv], &hous[indtau], &ldv, &work[indw + + tid * *kd]); + if (blklastind >= *n - 1) { + ++stt; + myexit_(); + } +/* L140: */ + } +/* L130: */ + } +/* L120: */ + } +/* L110: */ + } +/* L100: */ + } + + +/* Copy the diagonal from A to D. Note that D is REAL thus only */ +/* the Real part is needed, the imaginary part should be zero. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = dpos + (i__ - 1) * lda; + d__[i__] = work[i__2].r; +/* L150: */ + } + +/* Copy the off diagonal from A to E. Note that E is REAL thus only */ +/* the Real part is needed, the imaginary part should be zero. */ + + if (upper) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ofdpos + i__ * lda; + e[i__] = work[i__2].r; +/* L160: */ + } + } else { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ofdpos + (i__ - 1) * lda; + e[i__] = work[i__2].r; +/* L170: */ + } + } + + hous[1].r = (doublereal) lhmin, hous[1].i = 0.; + work[1].r = (doublereal) lwmin, work[1].i = 0.; + return 0; + +/* End of ZHETRD_HB2ST */ + +} /* zhetrd_hb2st__ */ + diff --git a/lapack-netlib/SRC/zhetrd_he2hb.c b/lapack-netlib/SRC/zhetrd_he2hb.c new file mode 100644 index 000000000..942dbc9ed --- /dev/null +++ b/lapack-netlib/SRC/zhetrd_he2hb.c @@ -0,0 +1,966 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZHETRD_HE2HB */ + +/* @precisions fortran z -> s d c */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETRD_HE2HB + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, */ +/* WORK, LWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDAB, LWORK, N, KD */ +/* COMPLEX*16 A( LDA, * ), AB( LDAB, * ), */ +/* TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian */ +/* > band-diagonal form AB by a unitary similarity transformation: */ +/* > Q**H * A * Q = AB. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the reduced matrix if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > The reduced matrix is stored in the array AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX*16 array, dimension (LDAB,N) */ +/* > On exit, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N-KD) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (LWORK) */ +/* > On exit, if INFO = 0, or if LWORK=-1, */ +/* > WORK(1) returns the size of LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK which should be calculated */ +/* > by a workspace query. LWORK = MAX(1, LWORK_QUERY) */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > LWORK_QUERY = N*KD + N*f2cmax(KD,FACTOPTNB) + 2*KD*KD */ +/* > where FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice otherwise */ +/* > putting LWORK=-1 will provide the size of WORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Implemented by Azzam Haidar. */ +/* > */ +/* > All details are available on technical report, SC11, SC13 papers. */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd. */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in */ +/* > A(i,i+kd+1:n), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = n-kd. */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in */ +/* > A(i+kd+2:n,i), and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( ab ab/v1 v1 v1 v1 ) ( ab ) */ +/* > ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) */ +/* > ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) */ +/* > ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) */ +/* > ( ab ) ( v1 v2 v3 ab/v4 ab ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhetrd_he2hb_(char *uplo, integer *n, integer *kd, + doublecomplex *a, integer *lda, doublecomplex *ab, integer *ldab, + doublecomplex *tau, doublecomplex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, + i__5; + doublecomplex z__1; + + /* Local variables */ + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + integer tpos, wpos, s1pos, s2pos, i__, j; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zhemm_(char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + integer lwmin; + logical upper; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zher2k_(char *, char *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + integer *); + integer lk, pk, pn, lt, lw; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zgelqf_( + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, integer *), zgeqrf_(integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, integer *), zlarft_(char *, char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *), zlaset_(char *, + integer *, integer *, doublecomplex *, doublecomplex *, + doublecomplex *, integer *); + integer ls1; + logical lquery; + integer ls2, ldt, ldw, lds1, lds2; + + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Determine the minimal workspace size required */ +/* and test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + lwmin = ilaenv2stage_(&c__4, "ZHETRD_HE2HB", "", n, kd, &c_n1, &c_n1); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *kd + 1; + if (*ldab < f2cmax(i__1,i__2)) { + *info = -7; + } else if (*lwork < lwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETRD_HE2HB", &i__1, (ftnlen)12); + return 0; + } else if (lquery) { + work[1].r = (doublereal) lwmin, work[1].i = 0.; + return 0; + } + +/* Quick return if possible */ +/* Copy the upper/lower portion of A into AB */ + + if (*n <= *kd + 1) { + if (upper) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ + i__2 = *kd + 1; + lk = f2cmin(i__2,i__); + zcopy_(&lk, &a[i__ - lk + 1 + i__ * a_dim1], &c__1, &ab[*kd + + 1 - lk + 1 + i__ * ab_dim1], &c__1); +/* L100: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ + i__2 = *kd + 1, i__3 = *n - i__ + 1; + lk = f2cmin(i__2,i__3); + zcopy_(&lk, &a[i__ + i__ * a_dim1], &c__1, &ab[i__ * ab_dim1 + + 1], &c__1); +/* L110: */ + } + } + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* Determine the pointer position for the workspace */ + + ldt = *kd; + lds1 = *kd; + lt = ldt * *kd; + lw = *n * *kd; + ls1 = lds1 * *kd; + ls2 = lwmin - lt - lw - ls1; +/* LS2 = N*MAX(KD,FACTOPTNB) */ + tpos = 1; + wpos = tpos + lt; + s1pos = wpos + lw; + s2pos = s1pos + ls1; + if (upper) { + ldw = *kd; + lds2 = *kd; + } else { + ldw = *n; + lds2 = *n; + } + + +/* Set the workspace of the triangular matrix T to zero once such a */ +/* way every time T is generated the upper/lower portion will be always zero */ + + zlaset_("A", &ldt, kd, &c_b1, &c_b1, &work[tpos], &ldt); + + if (upper) { + i__1 = *n - *kd; + i__2 = *kd; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + pn = *n - i__ - *kd + 1; +/* Computing MIN */ + i__3 = *n - i__ - *kd + 1; + pk = f2cmin(i__3,*kd); + +/* Compute the LQ factorization of the current block */ + + zgelqf_(kd, &pn, &a[i__ + (i__ + *kd) * a_dim1], lda, &tau[i__], & + work[s2pos], &ls2, &iinfo); + +/* Copy the upper portion of A into AB */ + + i__3 = i__ + pk - 1; + for (j = i__; j <= i__3; ++j) { +/* Computing MIN */ + i__4 = *kd, i__5 = *n - j; + lk = f2cmin(i__4,i__5) + 1; + i__4 = *ldab - 1; + zcopy_(&lk, &a[j + j * a_dim1], lda, &ab[*kd + 1 + j * + ab_dim1], &i__4); +/* L20: */ + } + + zlaset_("Lower", &pk, &pk, &c_b1, &c_b2, &a[i__ + (i__ + *kd) * + a_dim1], lda); + +/* Form the matrix T */ + + zlarft_("Forward", "Rowwise", &pn, &pk, &a[i__ + (i__ + *kd) * + a_dim1], lda, &tau[i__], &work[tpos], &ldt); + +/* Compute W: */ + + zgemm_("Conjugate", "No transpose", &pk, &pn, &pk, &c_b2, &work[ + tpos], &ldt, &a[i__ + (i__ + *kd) * a_dim1], lda, &c_b1, & + work[s2pos], &lds2); + + zhemm_("Right", uplo, &pk, &pn, &c_b2, &a[i__ + *kd + (i__ + *kd) + * a_dim1], lda, &work[s2pos], &lds2, &c_b1, &work[wpos], & + ldw); + + zgemm_("No transpose", "Conjugate", &pk, &pk, &pn, &c_b2, &work[ + wpos], &ldw, &work[s2pos], &lds2, &c_b1, &work[s1pos], & + lds1); + + z__1.r = -.5, z__1.i = 0.; + zgemm_("No transpose", "No transpose", &pk, &pn, &pk, &z__1, & + work[s1pos], &lds1, &a[i__ + (i__ + *kd) * a_dim1], lda, & + c_b2, &work[wpos], &ldw); + + +/* Update the unreduced submatrix A(i+kd:n,i+kd:n), using */ +/* an update of the form: A := A - V'*W - W'*V */ + + z__1.r = -1., z__1.i = 0.; + zher2k_(uplo, "Conjugate", &pn, &pk, &z__1, &a[i__ + (i__ + *kd) * + a_dim1], lda, &work[wpos], &ldw, &c_b33, &a[i__ + *kd + ( + i__ + *kd) * a_dim1], lda); +/* L10: */ + } + +/* Copy the upper band to AB which is the band storage matrix */ + + i__2 = *n; + for (j = *n - *kd + 1; j <= i__2; ++j) { +/* Computing MIN */ + i__1 = *kd, i__3 = *n - j; + lk = f2cmin(i__1,i__3) + 1; + i__1 = *ldab - 1; + zcopy_(&lk, &a[j + j * a_dim1], lda, &ab[*kd + 1 + j * ab_dim1], & + i__1); +/* L30: */ + } + + } else { + +/* Reduce the lower triangle of A to lower band matrix */ + + i__2 = *n - *kd; + i__1 = *kd; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + pn = *n - i__ - *kd + 1; +/* Computing MIN */ + i__3 = *n - i__ - *kd + 1; + pk = f2cmin(i__3,*kd); + +/* Compute the QR factorization of the current block */ + + zgeqrf_(&pn, kd, &a[i__ + *kd + i__ * a_dim1], lda, &tau[i__], & + work[s2pos], &ls2, &iinfo); + +/* Copy the upper portion of A into AB */ + + i__3 = i__ + pk - 1; + for (j = i__; j <= i__3; ++j) { +/* Computing MIN */ + i__4 = *kd, i__5 = *n - j; + lk = f2cmin(i__4,i__5) + 1; + zcopy_(&lk, &a[j + j * a_dim1], &c__1, &ab[j * ab_dim1 + 1], & + c__1); +/* L50: */ + } + + zlaset_("Upper", &pk, &pk, &c_b1, &c_b2, &a[i__ + *kd + i__ * + a_dim1], lda); + +/* Form the matrix T */ + + zlarft_("Forward", "Columnwise", &pn, &pk, &a[i__ + *kd + i__ * + a_dim1], lda, &tau[i__], &work[tpos], &ldt); + +/* Compute W: */ + + zgemm_("No transpose", "No transpose", &pn, &pk, &pk, &c_b2, &a[ + i__ + *kd + i__ * a_dim1], lda, &work[tpos], &ldt, &c_b1, + &work[s2pos], &lds2); + + zhemm_("Left", uplo, &pn, &pk, &c_b2, &a[i__ + *kd + (i__ + *kd) * + a_dim1], lda, &work[s2pos], &lds2, &c_b1, &work[wpos], & + ldw); + + zgemm_("Conjugate", "No transpose", &pk, &pk, &pn, &c_b2, &work[ + s2pos], &lds2, &work[wpos], &ldw, &c_b1, &work[s1pos], & + lds1); + + z__1.r = -.5, z__1.i = 0.; + zgemm_("No transpose", "No transpose", &pn, &pk, &pk, &z__1, &a[ + i__ + *kd + i__ * a_dim1], lda, &work[s1pos], &lds1, & + c_b2, &work[wpos], &ldw); + + +/* Update the unreduced submatrix A(i+kd:n,i+kd:n), using */ +/* an update of the form: A := A - V*W' - W*V' */ + + z__1.r = -1., z__1.i = 0.; + zher2k_(uplo, "No transpose", &pn, &pk, &z__1, &a[i__ + *kd + i__ + * a_dim1], lda, &work[wpos], &ldw, &c_b33, &a[i__ + *kd + + (i__ + *kd) * a_dim1], lda); +/* ================================================================== */ +/* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED */ +/* DO 45 J = I, I+PK-1 */ +/* LK = MIN( KD, N-J ) + 1 */ +/* CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) */ +/* 45 CONTINUE */ +/* ================================================================== */ +/* L40: */ + } + +/* Copy the lower band to AB which is the band storage matrix */ + + i__1 = *n; + for (j = *n - *kd + 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *kd, i__3 = *n - j; + lk = f2cmin(i__2,i__3) + 1; + zcopy_(&lk, &a[j + j * a_dim1], &c__1, &ab[j * ab_dim1 + 1], & + c__1); +/* L60: */ + } + } + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + return 0; + +/* End of ZHETRD_HE2HB */ + +} /* zhetrd_he2hb__ */ +