diff --git a/lapack-netlib/SRC/cggbal.c b/lapack-netlib/SRC/cggbal.c new file mode 100644 index 000000000..18a3b356c --- /dev/null +++ b/lapack-netlib/SRC/cggbal.c @@ -0,0 +1,1093 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGGBAL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGBAL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, */ +/* RSCALE, WORK, INFO ) */ + +/* CHARACTER JOB */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, N */ +/* REAL LSCALE( * ), RSCALE( * ), WORK( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGBAL balances a pair of general complex matrices (A,B). This */ +/* > involves, first, permuting A and B by similarity transformations to */ +/* > isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N */ +/* > elements on the diagonal; and second, applying a diagonal similarity */ +/* > transformation to rows and columns ILO to IHI to make the rows */ +/* > and columns as close in norm as possible. Both steps are optional. */ +/* > */ +/* > Balancing may reduce the 1-norm of the matrices, and improve the */ +/* > accuracy of the computed eigenvalues and/or eigenvectors in the */ +/* > generalized eigenvalue problem A*x = lambda*B*x. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies the operations to be performed on A and B: */ +/* > = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 */ +/* > and RSCALE(I) = 1.0 for i=1,...,N; */ +/* > = 'P': permute only; */ +/* > = 'S': scale only; */ +/* > = 'B': both permute and scale. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the input matrix A. */ +/* > On exit, A is overwritten by the balanced matrix. */ +/* > If JOB = 'N', A is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,N) */ +/* > On entry, the input matrix B. */ +/* > On exit, B is overwritten by the balanced matrix. */ +/* > If JOB = 'N', B is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are set to integers such that on exit */ +/* > A(i,j) = 0 and B(i,j) = 0 if i > j and */ +/* > j = 1,...,ILO-1 or i = IHI+1,...,N. */ +/* > If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] LSCALE */ +/* > \verbatim */ +/* > LSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the left side of A and B. If P(j) is the index of the */ +/* > row interchanged with row j, and D(j) is the scaling factor */ +/* > applied to row j, then */ +/* > LSCALE(j) = P(j) for J = 1,...,ILO-1 */ +/* > = D(j) for J = ILO,...,IHI */ +/* > = P(j) for J = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RSCALE */ +/* > \verbatim */ +/* > RSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the right side of A and B. If P(j) is the index of the */ +/* > column interchanged with column j, and D(j) is the scaling */ +/* > factor applied to column j, then */ +/* > RSCALE(j) = P(j) for J = 1,...,ILO-1 */ +/* > = D(j) for J = ILO,...,IHI */ +/* > = P(j) for J = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (lwork) */ +/* > lwork must be at least f2cmax(1,6*N) when JOB = 'S' or 'B', and */ +/* > at least 1 when JOB = 'N' or 'P'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexGBcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > See R.C. WARD, Balancing the generalized eigenvalue problem, */ +/* > SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cggbal_(char *job, integer *n, complex *a, integer *lda, + complex *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, + real *rscale, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3; + + /* Local variables */ + integer lcab; + real beta, coef; + integer irab, lrab; + real basl, cmax; + extern real sdot_(integer *, real *, integer *, real *, integer *); + real coef2, coef5; + integer i__, j, k, l, m; + real gamma, t, alpha; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + real sfmin; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + real sfmax; + integer iflow, kount; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *); + integer jc; + real ta, tb, tc; + integer ir, it; + real ew; + integer nr; + real pgamma; + extern integer icamax_(integer *, complex *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *, ftnlen); + integer lsfmin, lsfmax, ip1, jp1, lm1; + real cab, rab, ewc, cor, sum; + integer nrp2, icab; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --lscale; + --rscale; + --work; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*ldb < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGBAL", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *ilo = 1; + *ihi = *n; + return 0; + } + + if (*n == 1) { + *ilo = 1; + *ihi = *n; + lscale[1] = 1.f; + rscale[1] = 1.f; + return 0; + } + + if (lsame_(job, "N")) { + *ilo = 1; + *ihi = *n; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + lscale[i__] = 1.f; + rscale[i__] = 1.f; +/* L10: */ + } + return 0; + } + + k = 1; + l = *n; + if (lsame_(job, "S")) { + goto L190; + } + + goto L30; + +/* Permute the matrices A and B to isolate the eigenvalues. */ + +/* Find row with one nonzero in columns 1 through L */ + +L20: + l = lm1; + if (l != 1) { + goto L30; + } + + rscale[1] = 1.f; + lscale[1] = 1.f; + goto L190; + +L30: + lm1 = l - 1; + for (i__ = l; i__ >= 1; --i__) { + i__1 = lm1; + for (j = 1; j <= i__1; ++j) { + jp1 = j + 1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + j * b_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f || (b[i__3].r != 0.f || + b[i__3].i != 0.f)) { + goto L50; + } +/* L40: */ + } + j = l; + goto L70; + +L50: + i__1 = l; + for (j = jp1; j <= i__1; ++j) { + i__2 = i__ + j * a_dim1; + i__3 = i__ + j * b_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f || (b[i__3].r != 0.f || + b[i__3].i != 0.f)) { + goto L80; + } +/* L60: */ + } + j = jp1 - 1; + +L70: + m = l; + iflow = 1; + goto L160; +L80: + ; + } + goto L100; + +/* Find column with one nonzero in rows K through N */ + +L90: + ++k; + +L100: + i__1 = l; + for (j = k; j <= i__1; ++j) { + i__2 = lm1; + for (i__ = k; i__ <= i__2; ++i__) { + ip1 = i__ + 1; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * b_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 0.f || + b[i__4].i != 0.f)) { + goto L120; + } +/* L110: */ + } + i__ = l; + goto L140; +L120: + i__2 = l; + for (i__ = ip1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * b_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 0.f || + b[i__4].i != 0.f)) { + goto L150; + } +/* L130: */ + } + i__ = ip1 - 1; +L140: + m = k; + iflow = 2; + goto L160; +L150: + ; + } + goto L190; + +/* Permute rows M and I */ + +L160: + lscale[m] = (real) i__; + if (i__ == m) { + goto L170; + } + i__1 = *n - k + 1; + cswap_(&i__1, &a[i__ + k * a_dim1], lda, &a[m + k * a_dim1], lda); + i__1 = *n - k + 1; + cswap_(&i__1, &b[i__ + k * b_dim1], ldb, &b[m + k * b_dim1], ldb); + +/* Permute columns M and J */ + +L170: + rscale[m] = (real) j; + if (j == m) { + goto L180; + } + cswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + cswap_(&l, &b[j * b_dim1 + 1], &c__1, &b[m * b_dim1 + 1], &c__1); + +L180: + switch (iflow) { + case 1: goto L20; + case 2: goto L90; + } + +L190: + *ilo = k; + *ihi = l; + + if (lsame_(job, "P")) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + lscale[i__] = 1.f; + rscale[i__] = 1.f; +/* L195: */ + } + return 0; + } + + if (*ilo == *ihi) { + return 0; + } + +/* Balance the submatrix in rows ILO to IHI. */ + + nr = *ihi - *ilo + 1; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + rscale[i__] = 0.f; + lscale[i__] = 0.f; + + work[i__] = 0.f; + work[i__ + *n] = 0.f; + work[i__ + (*n << 1)] = 0.f; + work[i__ + *n * 3] = 0.f; + work[i__ + (*n << 2)] = 0.f; + work[i__ + *n * 5] = 0.f; +/* L200: */ + } + +/* Compute right side vector in resulting linear equations */ + + basl = r_lg10(&c_b36); + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *ihi; + for (j = *ilo; j <= i__2; ++j) { + i__3 = i__ + j * a_dim1; + if (a[i__3].r == 0.f && a[i__3].i == 0.f) { + ta = 0.f; + goto L210; + } + i__3 = i__ + j * a_dim1; + r__3 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + j * + a_dim1]), abs(r__2)); + ta = r_lg10(&r__3) / basl; + +L210: + i__3 = i__ + j * b_dim1; + if (b[i__3].r == 0.f && b[i__3].i == 0.f) { + tb = 0.f; + goto L220; + } + i__3 = i__ + j * b_dim1; + r__3 = (r__1 = b[i__3].r, abs(r__1)) + (r__2 = r_imag(&b[i__ + j * + b_dim1]), abs(r__2)); + tb = r_lg10(&r__3) / basl; + +L220: + work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb; + work[j + *n * 5] = work[j + *n * 5] - ta - tb; +/* L230: */ + } +/* L240: */ + } + + coef = 1.f / (real) (nr << 1); + coef2 = coef * coef; + coef5 = coef2 * .5f; + nrp2 = nr + 2; + beta = 0.f; + it = 1; + +/* Start generalized conjugate gradient iteration */ + +L250: + + gamma = sdot_(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)] + , &c__1) + sdot_(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + * + n * 5], &c__1); + + ew = 0.f; + ewc = 0.f; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + ew += work[i__ + (*n << 2)]; + ewc += work[i__ + *n * 5]; +/* L260: */ + } + +/* Computing 2nd power */ + r__1 = ew; +/* Computing 2nd power */ + r__2 = ewc; +/* Computing 2nd power */ + r__3 = ew - ewc; + gamma = coef * gamma - coef2 * (r__1 * r__1 + r__2 * r__2) - coef5 * ( + r__3 * r__3); + if (gamma == 0.f) { + goto L350; + } + if (it != 1) { + beta = gamma / pgamma; + } + t = coef5 * (ewc - ew * 3.f); + tc = coef5 * (ew - ewc * 3.f); + + sscal_(&nr, &beta, &work[*ilo], &c__1); + sscal_(&nr, &beta, &work[*ilo + *n], &c__1); + + saxpy_(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], & + c__1); + saxpy_(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1); + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + work[i__] += tc; + work[i__ + *n] += t; +/* L270: */ + } + +/* Apply matrix to vector */ + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + kount = 0; + sum = 0.f; + i__2 = *ihi; + for (j = *ilo; j <= i__2; ++j) { + i__3 = i__ + j * a_dim1; + if (a[i__3].r == 0.f && a[i__3].i == 0.f) { + goto L280; + } + ++kount; + sum += work[j]; +L280: + i__3 = i__ + j * b_dim1; + if (b[i__3].r == 0.f && b[i__3].i == 0.f) { + goto L290; + } + ++kount; + sum += work[j]; +L290: + ; + } + work[i__ + (*n << 1)] = (real) kount * work[i__ + *n] + sum; +/* L300: */ + } + + i__1 = *ihi; + for (j = *ilo; j <= i__1; ++j) { + kount = 0; + sum = 0.f; + i__2 = *ihi; + for (i__ = *ilo; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + if (a[i__3].r == 0.f && a[i__3].i == 0.f) { + goto L310; + } + ++kount; + sum += work[i__ + *n]; +L310: + i__3 = i__ + j * b_dim1; + if (b[i__3].r == 0.f && b[i__3].i == 0.f) { + goto L320; + } + ++kount; + sum += work[i__ + *n]; +L320: + ; + } + work[j + *n * 3] = (real) kount * work[j] + sum; +/* L330: */ + } + + sum = sdot_(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) + + sdot_(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1); + alpha = gamma / sum; + +/* Determine correction to current iteration */ + + cmax = 0.f; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + cor = alpha * work[i__ + *n]; + if (abs(cor) > cmax) { + cmax = abs(cor); + } + lscale[i__] += cor; + cor = alpha * work[i__]; + if (abs(cor) > cmax) { + cmax = abs(cor); + } + rscale[i__] += cor; +/* L340: */ + } + if (cmax < .5f) { + goto L350; + } + + r__1 = -alpha; + saxpy_(&nr, &r__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)] + , &c__1); + r__1 = -alpha; + saxpy_(&nr, &r__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], & + c__1); + + pgamma = gamma; + ++it; + if (it <= nrp2) { + goto L250; + } + +/* End generalized conjugate gradient iteration */ + +L350: + sfmin = slamch_("S"); + sfmax = 1.f / sfmin; + lsfmin = (integer) (r_lg10(&sfmin) / basl + 1.f); + lsfmax = (integer) (r_lg10(&sfmax) / basl); + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *n - *ilo + 1; + irab = icamax_(&i__2, &a[i__ + *ilo * a_dim1], lda); + rab = c_abs(&a[i__ + (irab + *ilo - 1) * a_dim1]); + i__2 = *n - *ilo + 1; + irab = icamax_(&i__2, &b[i__ + *ilo * b_dim1], ldb); +/* Computing MAX */ + r__1 = rab, r__2 = c_abs(&b[i__ + (irab + *ilo - 1) * b_dim1]); + rab = f2cmax(r__1,r__2); + r__1 = rab + sfmin; + lrab = (integer) (r_lg10(&r__1) / basl + 1.f); + ir = lscale[i__] + r_sign(&c_b72, &lscale[i__]); +/* Computing MIN */ + i__2 = f2cmax(ir,lsfmin), i__2 = f2cmin(i__2,lsfmax), i__3 = lsfmax - lrab; + ir = f2cmin(i__2,i__3); + lscale[i__] = pow_ri(&c_b36, &ir); + icab = icamax_(ihi, &a[i__ * a_dim1 + 1], &c__1); + cab = c_abs(&a[icab + i__ * a_dim1]); + icab = icamax_(ihi, &b[i__ * b_dim1 + 1], &c__1); +/* Computing MAX */ + r__1 = cab, r__2 = c_abs(&b[icab + i__ * b_dim1]); + cab = f2cmax(r__1,r__2); + r__1 = cab + sfmin; + lcab = (integer) (r_lg10(&r__1) / basl + 1.f); + jc = rscale[i__] + r_sign(&c_b72, &rscale[i__]); +/* Computing MIN */ + i__2 = f2cmax(jc,lsfmin), i__2 = f2cmin(i__2,lsfmax), i__3 = lsfmax - lcab; + jc = f2cmin(i__2,i__3); + rscale[i__] = pow_ri(&c_b36, &jc); +/* L360: */ + } + +/* Row scaling of matrices A and B */ + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *n - *ilo + 1; + csscal_(&i__2, &lscale[i__], &a[i__ + *ilo * a_dim1], lda); + i__2 = *n - *ilo + 1; + csscal_(&i__2, &lscale[i__], &b[i__ + *ilo * b_dim1], ldb); +/* L370: */ + } + +/* Column scaling of matrices A and B */ + + i__1 = *ihi; + for (j = *ilo; j <= i__1; ++j) { + csscal_(ihi, &rscale[j], &a[j * a_dim1 + 1], &c__1); + csscal_(ihi, &rscale[j], &b[j * b_dim1 + 1], &c__1); +/* L380: */ + } + + return 0; + +/* End of CGGBAL */ + +} /* cggbal_ */ + diff --git a/lapack-netlib/SRC/cgges.c b/lapack-netlib/SRC/cgges.c new file mode 100644 index 000000000..c45ca7fd5 --- /dev/null +++ b/lapack-netlib/SRC/cgges.c @@ -0,0 +1,1069 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGGES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors f +or GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGES + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, */ +/* SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, */ +/* LWORK, RWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVSL, JOBVSR, SORT */ +/* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM */ +/* LOGICAL BWORK( * ) */ +/* REAL RWORK( * ) */ +/* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), */ +/* $ WORK( * ) */ +/* LOGICAL SELCTG */ +/* EXTERNAL SELCTG */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGES computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, the generalized complex Schur */ +/* > form (S, T), and optionally left and/or right Schur vectors (VSL */ +/* > and VSR). This gives the generalized Schur factorization */ +/* > */ +/* > (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) */ +/* > */ +/* > where (VSR)**H is the conjugate-transpose of VSR. */ +/* > */ +/* > Optionally, it also orders the eigenvalues so that a selected cluster */ +/* > of eigenvalues appears in the leading diagonal blocks of the upper */ +/* > triangular matrix S and the upper triangular matrix T. The leading */ +/* > columns of VSL and VSR then form an unitary basis for the */ +/* > corresponding left and right eigenspaces (deflating subspaces). */ +/* > */ +/* > (If only the generalized eigenvalues are needed, use the driver */ +/* > CGGEV instead, which is faster.) */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* > or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* > usually represented as the pair (alpha,beta), as there is a */ +/* > reasonable interpretation for beta=0, and even for both being zero. */ +/* > */ +/* > A pair of matrices (S,T) is in generalized complex Schur form if S */ +/* > and T are upper triangular and, in addition, the diagonal elements */ +/* > of T are non-negative real numbers. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVSL */ +/* > \verbatim */ +/* > JOBVSL is CHARACTER*1 */ +/* > = 'N': do not compute the left Schur vectors; */ +/* > = 'V': compute the left Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVSR */ +/* > \verbatim */ +/* > JOBVSR is CHARACTER*1 */ +/* > = 'N': do not compute the right Schur vectors; */ +/* > = 'V': compute the right Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the generalized Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELCTG). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELCTG */ +/* > \verbatim */ +/* > SELCTG is a LOGICAL FUNCTION of two COMPLEX arguments */ +/* > SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'N', SELCTG is not referenced. */ +/* > If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > An eigenvalue ALPHA(j)/BETA(j) is selected if */ +/* > SELCTG(ALPHA(j),BETA(j)) is true. */ +/* > */ +/* > Note that a selected complex eigenvalue may no longer satisfy */ +/* > SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned), in this */ +/* > case INFO is set to N+2 (See INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VSL, and VSR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the first of the pair of matrices. */ +/* > On exit, A has been overwritten by its generalized Schur */ +/* > form S. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the second of the pair of matrices. */ +/* > On exit, B has been overwritten by its generalized Schur */ +/* > form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELCTG is true. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), */ +/* > j=1,...,N are the diagonals of the complex Schur form (A,B) */ +/* > output by CGGES. The BETA(j) will be non-negative real. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSL */ +/* > \verbatim */ +/* > VSL is COMPLEX array, dimension (LDVSL,N) */ +/* > If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* > Not referenced if JOBVSL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSL */ +/* > \verbatim */ +/* > LDVSL is INTEGER */ +/* > The leading dimension of the matrix VSL. LDVSL >= 1, and */ +/* > if JOBVSL = 'V', LDVSL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSR */ +/* > \verbatim */ +/* > VSR is COMPLEX array, dimension (LDVSR,N) */ +/* > If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* > Not referenced if JOBVSR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSR */ +/* > \verbatim */ +/* > LDVSR is INTEGER */ +/* > The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* > if JOBVSR = 'V', LDVSR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (8*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > =1,...,N: */ +/* > The QZ iteration failed. (A,B) are not in Schur */ +/* > form, but ALPHA(j) and BETA(j) should be correct for */ +/* > j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in CHGEQZ */ +/* > =N+2: after reordering, roundoff changed values of */ +/* > some complex eigenvalues so that leading */ +/* > eigenvalues in the Generalized Schur form no */ +/* > longer satisfy SELCTG=.TRUE. This could also */ +/* > be caused due to scaling. */ +/* > =N+3: reordering failed in CTGSEN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int cgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, complex *a, integer *lda, complex *b, integer * + ldb, integer *sdim, complex *alpha, complex *beta, complex *vsl, + integer *ldvsl, complex *vsr, integer *ldvsr, complex *work, integer * + lwork, real *rwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + + /* Local variables */ + real anrm, bnrm; + integer idum[1], ierr, itau, iwrk; + real pvsl, pvsr; + integer i__; + extern logical lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irwrk, irows; + extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, complex *, integer *, + integer *), cggbal_(char *, integer *, complex *, + integer *, complex *, integer *, integer *, integer *, real *, + real *, real *, integer *), slabad_(real *, real *); + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *); + extern /* Subroutine */ int cgghrd_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, integer *), + clascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, complex *, integer *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), claset_(char *, + integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + real bignum; + extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + complex *, complex *, complex *, integer *, complex *, integer *, + complex *, integer *, real *, integer *), + ctgsen_(integer *, logical *, logical *, logical *, integer *, + complex *, integer *, complex *, integer *, complex *, complex *, + complex *, integer *, complex *, integer *, integer *, real *, + real *, real *, complex *, integer *, integer *, integer *, + integer *); + integer ijobvl, iright, ijobvr; + real anrmto; + integer lwkmin; + logical lastsl; + real bnrmto; + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *), + cunmqr_(char *, char *, integer *, integer *, integer *, complex + *, integer *, complex *, complex *, integer *, complex *, integer + *, integer *); + real smlnum; + logical wantst, lquery; + integer lwkopt; + real dif[2]; + integer ihi, ilo; + real eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1 * 1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1 * 1; + vsr -= vsr_offset; + --work; + --rwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = lsame_(sort, "S"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -3; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -14; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -16; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + lwkmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", n, &c__1, n, + &c__0, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + *n * ilaenv_(&c__1, "CUNMQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); + if (ilvsl) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + + if (*lwork < lwkmin && ! lquery) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGES ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Real Workspace: need 6*N) */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + cggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Complex Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + cgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Complex Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + cunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ +/* (Complex Workspace: need N, prefer N*NB) */ + + if (ilvsl) { + claset_("Full", n, n, &c_b1, &c_b2, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + clacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + cungqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + claset_("Full", n, n, &c_b1, &c_b2, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + cgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); + + *sdim = 0; + +/* Perform QZ algorithm, computing Schur vectors if desired */ +/* (Complex Workspace: need N) */ +/* (Real Workspace: need N) */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + chgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, & + vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L30; + } + +/* Sort eigenvalues ALPHA/BETA if desired */ +/* (Workspace: none needed) */ + + if (wantst) { + +/* Undo scaling on eigenvalues before selecting */ + + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, &c__1, &alpha[1], n, + &ierr); + } + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alpha[i__], &beta[i__]); +/* L10: */ + } + + i__1 = *lwork - iwrk + 1; + ctgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, + &vsr[vsr_offset], ldvsr, sdim, &pvsl, &pvsr, dif, &work[iwrk], + &i__1, idum, &c__1, &ierr); + if (ierr == 1) { + *info = *n + 3; + } + + } + +/* Apply back-permutation to VSL and VSR */ +/* (Workspace: none needed) */ + + if (ilvsl) { + cggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsl[vsl_offset], ldvsl, &ierr); + } + if (ilvsr) { + cggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsr[vsr_offset], ldvsr, &ierr); + } + +/* Undo scaling */ + + if (ilascl) { + clascl_("U", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + clascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + *sdim = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alpha[i__], &beta[i__]); + if (cursl) { + ++(*sdim); + } + if (cursl && ! lastsl) { + *info = *n + 2; + } + lastsl = cursl; +/* L20: */ + } + + } + +L30: + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CGGES */ + +} /* cgges_ */ + diff --git a/lapack-netlib/SRC/cgges3.c b/lapack-netlib/SRC/cgges3.c new file mode 100644 index 000000000..c83106ae1 --- /dev/null +++ b/lapack-netlib/SRC/cgges3.c @@ -0,0 +1,1078 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGGES3 computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors +for GE matrices (blocked algorithm) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGES3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, */ +/* $ LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, */ +/* $ WORK, LWORK, RWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVSL, JOBVSR, SORT */ +/* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM */ +/* LOGICAL BWORK( * ) */ +/* REAL RWORK( * ) */ +/* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), */ +/* $ WORK( * ) */ +/* LOGICAL SELCTG */ +/* EXTERNAL SELCTG */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGES3 computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, the generalized complex Schur */ +/* > form (S, T), and optionally left and/or right Schur vectors (VSL */ +/* > and VSR). This gives the generalized Schur factorization */ +/* > */ +/* > (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) */ +/* > */ +/* > where (VSR)**H is the conjugate-transpose of VSR. */ +/* > */ +/* > Optionally, it also orders the eigenvalues so that a selected cluster */ +/* > of eigenvalues appears in the leading diagonal blocks of the upper */ +/* > triangular matrix S and the upper triangular matrix T. The leading */ +/* > columns of VSL and VSR then form an unitary basis for the */ +/* > corresponding left and right eigenspaces (deflating subspaces). */ +/* > */ +/* > (If only the generalized eigenvalues are needed, use the driver */ +/* > CGGEV instead, which is faster.) */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* > or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* > usually represented as the pair (alpha,beta), as there is a */ +/* > reasonable interpretation for beta=0, and even for both being zero. */ +/* > */ +/* > A pair of matrices (S,T) is in generalized complex Schur form if S */ +/* > and T are upper triangular and, in addition, the diagonal elements */ +/* > of T are non-negative real numbers. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVSL */ +/* > \verbatim */ +/* > JOBVSL is CHARACTER*1 */ +/* > = 'N': do not compute the left Schur vectors; */ +/* > = 'V': compute the left Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVSR */ +/* > \verbatim */ +/* > JOBVSR is CHARACTER*1 */ +/* > = 'N': do not compute the right Schur vectors; */ +/* > = 'V': compute the right Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the generalized Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELCTG). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELCTG */ +/* > \verbatim */ +/* > SELCTG is a LOGICAL FUNCTION of two COMPLEX arguments */ +/* > SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'N', SELCTG is not referenced. */ +/* > If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > An eigenvalue ALPHA(j)/BETA(j) is selected if */ +/* > SELCTG(ALPHA(j),BETA(j)) is true. */ +/* > */ +/* > Note that a selected complex eigenvalue may no longer satisfy */ +/* > SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned), in this */ +/* > case INFO is set to N+2 (See INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VSL, and VSR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the first of the pair of matrices. */ +/* > On exit, A has been overwritten by its generalized Schur */ +/* > form S. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the second of the pair of matrices. */ +/* > On exit, B has been overwritten by its generalized Schur */ +/* > form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELCTG is true. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), */ +/* > j=1,...,N are the diagonals of the complex Schur form (A,B) */ +/* > output by CGGES3. The BETA(j) will be non-negative real. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSL */ +/* > \verbatim */ +/* > VSL is COMPLEX array, dimension (LDVSL,N) */ +/* > If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* > Not referenced if JOBVSL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSL */ +/* > \verbatim */ +/* > LDVSL is INTEGER */ +/* > The leading dimension of the matrix VSL. LDVSL >= 1, and */ +/* > if JOBVSL = 'V', LDVSL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSR */ +/* > \verbatim */ +/* > VSR is COMPLEX array, dimension (LDVSR,N) */ +/* > If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* > Not referenced if JOBVSR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSR */ +/* > \verbatim */ +/* > LDVSR is INTEGER */ +/* > The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* > if JOBVSR = 'V', LDVSR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (8*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > =1,...,N: */ +/* > The QZ iteration failed. (A,B) are not in Schur */ +/* > form, but ALPHA(j) and BETA(j) should be correct for */ +/* > j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in CHGEQZ */ +/* > =N+2: after reordering, roundoff changed values of */ +/* > some complex eigenvalues so that leading */ +/* > eigenvalues in the Generalized Schur form no */ +/* > longer satisfy SELCTG=.TRUE. This could also */ +/* > be caused due to scaling. */ +/* > =N+3: reordering failed in CTGSEN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date January 2015 */ + +/* > \ingroup complexGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int cgges3_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, complex *a, integer *lda, complex *b, integer * + ldb, integer *sdim, complex *alpha, complex *beta, complex *vsl, + integer *ldvsl, complex *vsr, integer *ldvsr, complex *work, integer * + lwork, real *rwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + complex q__1; + + /* Local variables */ + real anrm, bnrm; + integer idum[1], ierr, itau, iwrk; + real pvsl, pvsr; + integer i__; + extern logical lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irwrk; + extern /* Subroutine */ int cgghd3_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, integer *); + integer irows; + extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, complex *, integer *, + integer *), cggbal_(char *, integer *, complex *, + integer *, complex *, integer *, integer *, integer *, real *, + real *, real *, integer *), slabad_(real *, real *); + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *); + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), claset_(char *, + integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *, ftnlen); + real bignum; + extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + complex *, complex *, complex *, integer *, complex *, integer *, + complex *, integer *, real *, integer *), + ctgsen_(integer *, logical *, logical *, logical *, integer *, + complex *, integer *, complex *, integer *, complex *, complex *, + complex *, integer *, complex *, integer *, integer *, real *, + real *, real *, complex *, integer *, integer *, integer *, + integer *); + integer ijobvl, iright, ijobvr; + real anrmto, bnrmto; + logical lastsl; + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *), + cunmqr_(char *, char *, integer *, integer *, integer *, complex + *, integer *, complex *, complex *, integer *, complex *, integer + *, integer *); + real smlnum; + logical wantst, lquery; + integer lwkopt; + real dif[2]; + integer ihi, ilo; + real eps; + + +/* -- LAPACK driver routine (version 3.6.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2015 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1 * 1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1 * 1; + vsr -= vsr_offset; + --work; + --rwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = lsame_(sort, "S"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -3; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -14; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -16; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -18; + } + } + +/* Compute workspace */ + + if (*info == 0) { + cgeqrf_(n, n, &b[b_offset], ldb, &work[1], &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = 1, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + cunmqr_("L", "C", n, n, n, &b[b_offset], ldb, &work[1], &a[a_offset], + lda, &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + if (ilvsl) { + cungqr_(n, n, n, &vsl[vsl_offset], ldvsl, &work[1], &work[1], & + c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } + cgghd3_(jobvsl, jobvsr, n, &c__1, n, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[ + 1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + chgeqz_("S", jobvsl, jobvsr, n, &c__1, n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, + &vsr[vsr_offset], ldvsr, &work[1], &c_n1, &rwork[1], &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + if (wantst) { + ctgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, & + b[b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], + ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, &pvsr, dif, & + work[1], &c_n1, idum, &c__1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGES3 ", &i__1, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + cggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + cgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ + + i__1 = *lwork + 1 - iwrk; + cunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ + + if (ilvsl) { + claset_("Full", n, n, &c_b1, &c_b2, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + clacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + cungqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + claset_("Full", n, n, &c_b1, &c_b2, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ + + i__1 = *lwork + 1 - iwrk; + cgghd3_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk] + , &i__1, &ierr); + + *sdim = 0; + +/* Perform QZ algorithm, computing Schur vectors if desired */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + chgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, & + vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L30; + } + +/* Sort eigenvalues ALPHA/BETA if desired */ + + if (wantst) { + +/* Undo scaling on eigenvalues before selecting */ + + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, &c__1, &alpha[1], n, + &ierr); + } + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alpha[i__], &beta[i__]); +/* L10: */ + } + + i__1 = *lwork - iwrk + 1; + ctgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, + &vsr[vsr_offset], ldvsr, sdim, &pvsl, &pvsr, dif, &work[iwrk], + &i__1, idum, &c__1, &ierr); + if (ierr == 1) { + *info = *n + 3; + } + + } + +/* Apply back-permutation to VSL and VSR */ + + if (ilvsl) { + cggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsl[vsl_offset], ldvsl, &ierr); + } + if (ilvsr) { + cggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsr[vsr_offset], ldvsr, &ierr); + } + +/* Undo scaling */ + + if (ilascl) { + clascl_("U", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + clascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + *sdim = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alpha[i__], &beta[i__]); + if (cursl) { + ++(*sdim); + } + if (cursl && ! lastsl) { + *info = *n + 2; + } + lastsl = cursl; +/* L20: */ + } + + } + +L30: + + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + + return 0; + +/* End of CGGES3 */ + +} /* cgges3_ */ + diff --git a/lapack-netlib/SRC/cggesx.c b/lapack-netlib/SRC/cggesx.c new file mode 100644 index 000000000..4e0426fb5 --- /dev/null +++ b/lapack-netlib/SRC/cggesx.c @@ -0,0 +1,1193 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGGESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors +for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGESX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, */ +/* B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, */ +/* LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, */ +/* IWORK, LIWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVSL, JOBVSR, SENSE, SORT */ +/* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, */ +/* $ SDIM */ +/* LOGICAL BWORK( * ) */ +/* INTEGER IWORK( * ) */ +/* REAL RCONDE( 2 ), RCONDV( 2 ), RWORK( * ) */ +/* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), */ +/* $ WORK( * ) */ +/* LOGICAL SELCTG */ +/* EXTERNAL SELCTG */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGESX computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, the complex Schur form (S,T), */ +/* > and, optionally, the left and/or right matrices of Schur vectors (VSL */ +/* > and VSR). This gives the generalized Schur factorization */ +/* > */ +/* > (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) */ +/* > */ +/* > where (VSR)**H is the conjugate-transpose of VSR. */ +/* > */ +/* > Optionally, it also orders the eigenvalues so that a selected cluster */ +/* > of eigenvalues appears in the leading diagonal blocks of the upper */ +/* > triangular matrix S and the upper triangular matrix T; computes */ +/* > a reciprocal condition number for the average of the selected */ +/* > eigenvalues (RCONDE); and computes a reciprocal condition number for */ +/* > the right and left deflating subspaces corresponding to the selected */ +/* > eigenvalues (RCONDV). The leading columns of VSL and VSR then form */ +/* > an orthonormal basis for the corresponding left and right eigenspaces */ +/* > (deflating subspaces). */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* > or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* > usually represented as the pair (alpha,beta), as there is a */ +/* > reasonable interpretation for beta=0 or for both being zero. */ +/* > */ +/* > A pair of matrices (S,T) is in generalized complex Schur form if T is */ +/* > upper triangular with non-negative diagonal and S is upper */ +/* > triangular. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVSL */ +/* > \verbatim */ +/* > JOBVSL is CHARACTER*1 */ +/* > = 'N': do not compute the left Schur vectors; */ +/* > = 'V': compute the left Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVSR */ +/* > \verbatim */ +/* > JOBVSR is CHARACTER*1 */ +/* > = 'N': do not compute the right Schur vectors; */ +/* > = 'V': compute the right Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the generalized Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELCTG). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELCTG */ +/* > \verbatim */ +/* > SELCTG is a LOGICAL FUNCTION of two COMPLEX arguments */ +/* > SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'N', SELCTG is not referenced. */ +/* > If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > Note that a selected complex eigenvalue may no longer satisfy */ +/* > SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned), in this */ +/* > case INFO is set to N+3 see INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': None are computed; */ +/* > = 'E': Computed for average of selected eigenvalues only; */ +/* > = 'V': Computed for selected deflating subspaces only; */ +/* > = 'B': Computed for both. */ +/* > If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VSL, and VSR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the first of the pair of matrices. */ +/* > On exit, A has been overwritten by its generalized Schur */ +/* > form S. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the second of the pair of matrices. */ +/* > On exit, B has been overwritten by its generalized Schur */ +/* > form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELCTG is true. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. ALPHA(j) and BETA(j),j=1,...,N are */ +/* > the diagonals of the complex Schur form (S,T). BETA(j) will */ +/* > be non-negative real. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSL */ +/* > \verbatim */ +/* > VSL is COMPLEX array, dimension (LDVSL,N) */ +/* > If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* > Not referenced if JOBVSL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSL */ +/* > \verbatim */ +/* > LDVSL is INTEGER */ +/* > The leading dimension of the matrix VSL. LDVSL >=1, and */ +/* > if JOBVSL = 'V', LDVSL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSR */ +/* > \verbatim */ +/* > VSR is COMPLEX array, dimension (LDVSR,N) */ +/* > If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* > Not referenced if JOBVSR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSR */ +/* > \verbatim */ +/* > LDVSR is INTEGER */ +/* > The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* > if JOBVSR = 'V', LDVSR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is REAL array, dimension ( 2 ) */ +/* > If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the */ +/* > reciprocal condition numbers for the average of the selected */ +/* > eigenvalues. */ +/* > Not referenced if SENSE = 'N' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is REAL array, dimension ( 2 ) */ +/* > If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the */ +/* > reciprocal condition number for the selected deflating */ +/* > subspaces. */ +/* > Not referenced if SENSE = 'N' or 'E'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', */ +/* > LWORK >= MAX(1,2*N,2*SDIM*(N-SDIM)), else */ +/* > LWORK >= MAX(1,2*N). Note that 2*SDIM*(N-SDIM) <= N*N/2. */ +/* > Note also that an error is only returned if */ +/* > LWORK < MAX(1,2*N), but if SENSE = 'E' or 'V' or 'B' this may */ +/* > not be large enough. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the bound on the optimal size of the WORK */ +/* > array and the minimum size of the IWORK array, returns these */ +/* > values as the first entries of the WORK and IWORK arrays, and */ +/* > no error message related to LWORK or LIWORK is issued by */ +/* > XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension ( 8*N ) */ +/* > Real workspace. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise */ +/* > LIWORK >= N+2. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the bound on the optimal size of the */ +/* > WORK array and the minimum size of the IWORK array, returns */ +/* > these values as the first entries of the WORK and IWORK */ +/* > arrays, and no error message related to LWORK or LIWORK is */ +/* > issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. (A,B) are not in Schur */ +/* > form, but ALPHA(j) and BETA(j) should be correct for */ +/* > j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in CHGEQZ */ +/* > =N+2: after reordering, roundoff changed values of */ +/* > some complex eigenvalues so that leading */ +/* > eigenvalues in the Generalized Schur form no */ +/* > longer satisfy SELCTG=.TRUE. This could also */ +/* > be caused due to scaling. */ +/* > =N+3: reordering failed in CTGSEN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup complexGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int cggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, complex *a, integer *lda, complex *b, + integer *ldb, integer *sdim, complex *alpha, complex *beta, complex * + vsl, integer *ldvsl, complex *vsr, integer *ldvsr, real *rconde, real + *rcondv, complex *work, integer *lwork, real *rwork, integer *iwork, + integer *liwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + + /* Local variables */ + integer ijob; + real anrm, bnrm; + integer ierr, itau, iwrk, lwrk, i__; + extern logical lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irwrk, irows; + extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, complex *, integer *, + integer *), cggbal_(char *, integer *, complex *, + integer *, complex *, integer *, integer *, integer *, real *, + real *, real *, integer *), slabad_(real *, real *); + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *); + real pl; + extern /* Subroutine */ int cgghrd_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, integer *), + clascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, complex *, integer *, integer *); + real pr; + logical ilascl, ilbscl; + extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), clacpy_( + char *, integer *, integer *, complex *, integer *, complex *, + integer *), claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *), xerbla_(char *, + integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern real slamch_(char *); + real bignum; + extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + complex *, complex *, complex *, integer *, complex *, integer *, + complex *, integer *, real *, integer *), + ctgsen_(integer *, logical *, logical *, logical *, integer *, + complex *, integer *, complex *, integer *, complex *, complex *, + complex *, integer *, complex *, integer *, integer *, real *, + real *, real *, complex *, integer *, integer *, integer *, + integer *); + integer ijobvl, iright, ijobvr; + logical wantsb; + integer liwmin; + logical wantse, lastsl; + real anrmto, bnrmto; + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *); + integer minwrk, maxwrk; + logical wantsn; + real smlnum; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + logical wantst, lquery, wantsv; + real dif[2]; + integer ihi, ilo; + real eps; + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1 * 1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1 * 1; + vsr -= vsr_offset; + --rconde; + --rcondv; + --work; + --rwork; + --iwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = lsame_(sort, "S"); + wantsn = lsame_(sense, "N"); + wantse = lsame_(sense, "E"); + wantsv = lsame_(sense, "V"); + wantsb = lsame_(sense, "B"); + lquery = *lwork == -1 || *liwork == -1; + if (wantsn) { + ijob = 0; + } else if (wantse) { + ijob = 1; + } else if (wantsv) { + ijob = 2; + } else if (wantsb) { + ijob = 4; + } + +/* Test the input arguments */ + + *info = 0; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -3; + } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! + wantsn) { + *info = -5; + } else if (*n < 0) { + *info = -6; + } else if (*lda < f2cmax(1,*n)) { + *info = -8; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -15; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -17; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + if (*n > 0) { + minwrk = *n << 1; + maxwrk = *n * (ilaenv_(&c__1, "CGEQRF", " ", n, &c__1, n, &c__0, ( + ftnlen)6, (ftnlen)1) + 1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * (ilaenv_(&c__1, "CUNMQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1) + 1); + maxwrk = f2cmax(i__1,i__2); + if (ilvsl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * (ilaenv_(&c__1, "CUNGQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1) + 1); + maxwrk = f2cmax(i__1,i__2); + } + lwrk = maxwrk; + if (ijob >= 1) { +/* Computing MAX */ + i__1 = lwrk, i__2 = *n * *n / 2; + lwrk = f2cmax(i__1,i__2); + } + } else { + minwrk = 1; + maxwrk = 1; + lwrk = 1; + } + work[1].r = (real) lwrk, work[1].i = 0.f; + if (wantsn || *n == 0) { + liwmin = 1; + } else { + liwmin = *n + 2; + } + iwork[1] = liwmin; + + if (*lwork < minwrk && ! lquery) { + *info = -21; + } else if (*liwork < liwmin && ! lquery) { + *info = -24; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGESX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Real Workspace: need 6*N) */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + cggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Complex Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + cgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the unitary transformation to matrix A */ +/* (Complex Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + cunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ +/* (Complex Workspace: need N, prefer N*NB) */ + + if (ilvsl) { + claset_("Full", n, n, &c_b1, &c_b2, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + clacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + cungqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + claset_("Full", n, n, &c_b1, &c_b2, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + cgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); + + *sdim = 0; + +/* Perform QZ algorithm, computing Schur vectors if desired */ +/* (Complex Workspace: need N) */ +/* (Real Workspace: need N) */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + chgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, & + vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L40; + } + +/* Sort eigenvalues ALPHA/BETA and compute the reciprocal of */ +/* condition number(s) */ + + if (wantst) { + +/* Undo scaling on eigenvalues before SELCTGing */ + + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, + &ierr); + } + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alpha[i__], &beta[i__]); +/* L10: */ + } + +/* Reorder eigenvalues, transform Generalized Schur vectors, and */ +/* compute reciprocal condition numbers */ +/* (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM)) */ +/* otherwise, need 1 ) */ + + i__1 = *lwork - iwrk + 1; + ctgsen_(&ijob, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, + &vsr[vsr_offset], ldvsr, sdim, &pl, &pr, dif, &work[iwrk], & + i__1, &iwork[1], liwork, &ierr); + + if (ijob >= 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*sdim << 1) * (*n - *sdim); + maxwrk = f2cmax(i__1,i__2); + } + if (ierr == -21) { + +/* not enough complex workspace */ + + *info = -21; + } else { + if (ijob == 1 || ijob == 4) { + rconde[1] = pl; + rconde[2] = pr; + } + if (ijob == 2 || ijob == 4) { + rcondv[1] = dif[0]; + rcondv[2] = dif[1]; + } + if (ierr == 1) { + *info = *n + 3; + } + } + + } + +/* Apply permutation to VSL and VSR */ +/* (Workspace: none needed) */ + + if (ilvsl) { + cggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsl[vsl_offset], ldvsl, &ierr); + } + + if (ilvsr) { + cggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & + vsr[vsr_offset], ldvsr, &ierr); + } + +/* Undo scaling */ + + if (ilascl) { + clascl_("U", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + clascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + *sdim = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alpha[i__], &beta[i__]); + if (cursl) { + ++(*sdim); + } + if (cursl && ! lastsl) { + *info = *n + 2; + } + lastsl = cursl; +/* L30: */ + } + + } + +L40: + + work[1].r = (real) maxwrk, work[1].i = 0.f; + iwork[1] = liwmin; + + return 0; + +/* End of CGGESX */ + +} /* cggesx_ */ + diff --git a/lapack-netlib/SRC/cggev.c b/lapack-netlib/SRC/cggev.c new file mode 100644 index 000000000..46312d940 --- /dev/null +++ b/lapack-netlib/SRC/cggev.c @@ -0,0 +1,1048 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGGEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matr +ices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, */ +/* VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) */ + +/* CHARACTER JOBVL, JOBVR */ +/* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N */ +/* REAL RWORK( * ) */ +/* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGEV computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, and optionally, the left and/or */ +/* > right generalized eigenvectors. */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* > singular. It is usually represented as the pair (alpha,beta), as */ +/* > there is a reasonable interpretation for beta=0, and even for both */ +/* > being zero. */ +/* > */ +/* > The right generalized eigenvector v(j) corresponding to the */ +/* > generalized eigenvalue lambda(j) of (A,B) satisfies */ +/* > */ +/* > A * v(j) = lambda(j) * B * v(j). */ +/* > */ +/* > The left generalized eigenvector u(j) corresponding to the */ +/* > generalized eigenvalues lambda(j) of (A,B) satisfies */ +/* > */ +/* > u(j)**H * A = lambda(j) * u(j)**H * B */ +/* > */ +/* > where u(j)**H is the conjugate-transpose of u(j). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': do not compute the left generalized eigenvectors; */ +/* > = 'V': compute the left generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': do not compute the right generalized eigenvectors; */ +/* > = 'V': compute the right generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VL, and VR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the matrix A in the pair (A,B). */ +/* > On exit, A has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the matrix B in the pair (A,B). */ +/* > On exit, B has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is COMPLEX array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left generalized eigenvectors u(j) are */ +/* > stored one after another in the columns of VL, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the matrix VL. LDVL >= 1, and */ +/* > if JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is COMPLEX array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right generalized eigenvectors v(j) are */ +/* > stored one after another in the columns of VR, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the matrix VR. LDVR >= 1, and */ +/* > if JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (8*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > =1,...,N: */ +/* > The QZ iteration failed. No eigenvectors have been */ +/* > calculated, but ALPHA(j) and BETA(j) should be */ +/* > correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other then QZ iteration failed in SHGEQZ, */ +/* > =N+2: error return from STGEVC. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complexGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int cggev_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, + complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * + work, integer *lwork, real *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4; + complex q__1; + + /* Local variables */ + real anrm, bnrm; + integer ierr, itau; + real temp; + logical ilvl, ilvr; + integer iwrk; + extern logical lsame_(char *, char *); + integer ileft, icols, irwrk, irows, jc; + extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, complex *, integer *, + integer *), cggbal_(char *, integer *, complex *, + integer *, complex *, integer *, integer *, integer *, real *, + real *, real *, integer *), slabad_(real *, real *); + integer in; + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *); + integer jr; + extern /* Subroutine */ int cgghrd_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, integer *), + clascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, complex *, integer *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), clacpy_( + char *, integer *, integer *, complex *, integer *, complex *, + integer *), claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *), ctgevc_(char *, char + *, logical *, integer *, complex *, integer *, complex *, integer + *, complex *, integer *, complex *, integer *, integer *, integer + *, complex *, real *, integer *), xerbla_(char *, + integer *, ftnlen); + logical ldumma[1]; + char chtemp[1]; + real bignum; + extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + complex *, complex *, complex *, integer *, complex *, integer *, + complex *, integer *, real *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern real slamch_(char *); + integer ijobvl, iright, ijobvr; + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *); + real anrmto; + integer lwkmin; + real bnrmto; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + real smlnum; + integer lwkopt; + logical lquery; + integer ihi, ilo; + real eps; + logical ilv; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + if (lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -11; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -13; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. The workspace is */ +/* computed assuming ILO = 1 and IHI = N, the worst case.) */ + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + lwkmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", n, &c__1, n, + &c__0, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + *n * ilaenv_(&c__1, "CUNMQR", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); + if (ilvl) { +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = f2cmax(i__1,i__2); + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + + if (*lwork < lwkmin && ! lquery) { + *info = -15; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("E") * slamch_("B"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrices A, B to isolate eigenvalues if possible */ +/* (Real Workspace: need 6*N) */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + cggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Complex Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + if (ilv) { + icols = *n + 1 - ilo; + } else { + icols = irows; + } + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + cgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Complex Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + cunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL */ +/* (Complex Workspace: need N, prefer N*NB) */ + + if (ilvl) { + claset_("Full", n, n, &c_b1, &c_b2, &vl[vl_offset], ldvl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + clacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ + ilo + 1 + ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + cungqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ + itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VR */ + + if (ilvr) { + claset_("Full", n, n, &c_b1, &c_b2, &vr[vr_offset], ldvr); + } + +/* Reduce to generalized Hessenberg form */ + + if (ilv) { + +/* Eigenvectors requested -- work on whole matrix. */ + + cgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); + } else { + cgghrd_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, + &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur form and Schur vectors) */ +/* (Complex Workspace: need N) */ +/* (Real Workspace: need N) */ + + iwrk = itau; + if (ilv) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + i__1 = *lwork + 1 - iwrk; + chgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L70; + } + +/* Compute Eigenvectors */ +/* (Real Workspace: need 2*N) */ +/* (Complex Workspace: need 2*N) */ + + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + + ctgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, + &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ + iwrk], &rwork[irwrk], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L70; + } + +/* Undo balancing on VL and VR and normalization */ +/* (Workspace: none needed) */ + + if (ilvl) { + cggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, + &vl[vl_offset], ldvl, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.f; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vl_dim1; + r__3 = temp, r__4 = (r__1 = vl[i__3].r, abs(r__1)) + ( + r__2 = r_imag(&vl[jr + jc * vl_dim1]), abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L10: */ + } + if (temp < smlnum) { + goto L30; + } + temp = 1.f / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vl_dim1; + i__4 = jr + jc * vl_dim1; + q__1.r = temp * vl[i__4].r, q__1.i = temp * vl[i__4].i; + vl[i__3].r = q__1.r, vl[i__3].i = q__1.i; +/* L20: */ + } +L30: + ; + } + } + if (ilvr) { + cggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, + &vr[vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.f; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vr_dim1; + r__3 = temp, r__4 = (r__1 = vr[i__3].r, abs(r__1)) + ( + r__2 = r_imag(&vr[jr + jc * vr_dim1]), abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L40: */ + } + if (temp < smlnum) { + goto L60; + } + temp = 1.f / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vr_dim1; + i__4 = jr + jc * vr_dim1; + q__1.r = temp * vr[i__4].r, q__1.i = temp * vr[i__4].i; + vr[i__3].r = q__1.r, vr[i__3].i = q__1.i; +/* L50: */ + } +L60: + ; + } + } + } + +/* Undo scaling if necessary */ + +L70: + + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CGGEV */ + +} /* cggev_ */ + diff --git a/lapack-netlib/SRC/cggev3.c b/lapack-netlib/SRC/cggev3.c new file mode 100644 index 000000000..da8e7ece9 --- /dev/null +++ b/lapack-netlib/SRC/cggev3.c @@ -0,0 +1,1062 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGGEV3 computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat +rices (blocked algorithm) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGEV3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, */ +/* $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) */ + +/* CHARACTER JOBVL, JOBVR */ +/* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N */ +/* REAL RWORK( * ) */ +/* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGEV3 computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, and optionally, the left and/or */ +/* > right generalized eigenvectors. */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* > singular. It is usually represented as the pair (alpha,beta), as */ +/* > there is a reasonable interpretation for beta=0, and even for both */ +/* > being zero. */ +/* > */ +/* > The right generalized eigenvector v(j) corresponding to the */ +/* > generalized eigenvalue lambda(j) of (A,B) satisfies */ +/* > */ +/* > A * v(j) = lambda(j) * B * v(j). */ +/* > */ +/* > The left generalized eigenvector u(j) corresponding to the */ +/* > generalized eigenvalues lambda(j) of (A,B) satisfies */ +/* > */ +/* > u(j)**H * A = lambda(j) * u(j)**H * B */ +/* > */ +/* > where u(j)**H is the conjugate-transpose of u(j). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': do not compute the left generalized eigenvectors; */ +/* > = 'V': compute the left generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': do not compute the right generalized eigenvectors; */ +/* > = 'V': compute the right generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VL, and VR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the matrix A in the pair (A,B). */ +/* > On exit, A has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the matrix B in the pair (A,B). */ +/* > On exit, B has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the */ +/* > generalized eigenvalues. */ +/* > */ +/* > Note: the quotients ALPHA(j)/BETA(j) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio alpha/beta. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is COMPLEX array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left generalized eigenvectors u(j) are */ +/* > stored one after another in the columns of VL, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the matrix VL. LDVL >= 1, and */ +/* > if JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is COMPLEX array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right generalized eigenvectors v(j) are */ +/* > stored one after another in the columns of VR, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the matrix VR. LDVR >= 1, and */ +/* > if JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (8*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > =1,...,N: */ +/* > The QZ iteration failed. No eigenvectors have been */ +/* > calculated, but ALPHA(j) and BETA(j) should be */ +/* > correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other then QZ iteration failed in SHGEQZ, */ +/* > =N+2: error return from STGEVC. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date January 2015 */ + +/* > \ingroup complexGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int cggev3_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *b, integer *ldb, complex *alpha, complex * + beta, complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex + *work, integer *lwork, real *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4; + complex q__1; + + /* Local variables */ + real anrm, bnrm; + integer ierr, itau; + real temp; + logical ilvl, ilvr; + integer iwrk; + extern logical lsame_(char *, char *); + integer ileft, icols, irwrk; + extern /* Subroutine */ int cgghd3_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, integer *); + integer irows, jc; + extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, complex *, integer *, + integer *), cggbal_(char *, integer *, complex *, + integer *, complex *, integer *, integer *, integer *, real *, + real *, real *, integer *), slabad_(real *, real *); + integer in; + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *); + integer jr; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), clacpy_( + char *, integer *, integer *, complex *, integer *, complex *, + integer *), claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *), ctgevc_(char *, char + *, logical *, integer *, complex *, integer *, complex *, integer + *, complex *, integer *, complex *, integer *, integer *, integer + *, complex *, real *, integer *), xerbla_(char *, + integer *, ftnlen); + logical ldumma[1]; + char chtemp[1]; + real bignum; + extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + complex *, complex *, complex *, integer *, complex *, integer *, + complex *, integer *, real *, integer *); + extern real slamch_(char *); + integer ijobvl, iright, ijobvr; + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *); + real anrmto, bnrmto; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + real smlnum; + integer lwkopt; + logical lquery; + integer ihi, ilo; + real eps; + logical ilv; + + +/* -- LAPACK driver routine (version 3.6.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2015 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + if (lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -11; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -13; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -15; + } + } + +/* Compute workspace */ + + if (*info == 0) { + cgeqrf_(n, n, &b[b_offset], ldb, &work[1], &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = *n, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + cunmqr_("L", "C", n, n, n, &b[b_offset], ldb, &work[1], &a[a_offset], + lda, &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + if (ilvl) { + cungqr_(n, n, n, &vl[vl_offset], ldvl, &work[1], &work[1], &c_n1, + &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } + if (ilv) { + cgghd3_(jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[b_offset] + , ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[ + 1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + chgeqz_("S", jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, &work[1], &c_n1, &rwork[1], &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } else { + cgghd3_("N", "N", n, &c__1, n, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[1], + &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + chgeqz_("E", jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, &work[1], &c_n1, &rwork[1], &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n + (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + } + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGEV3 ", &i__1, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("E") * slamch_("B"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrices A, B to isolate eigenvalues if possible */ + + ileft = 1; + iright = *n + 1; + irwrk = iright + *n; + cggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ + ileft], &rwork[iright], &rwork[irwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ + + irows = ihi + 1 - ilo; + if (ilv) { + icols = *n + 1 - ilo; + } else { + icols = irows; + } + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + cgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ + + i__1 = *lwork + 1 - iwrk; + cunmqr_("L", "C", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL */ + + if (ilvl) { + claset_("Full", n, n, &c_b1, &c_b2, &vl[vl_offset], ldvl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + clacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ + ilo + 1 + ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + cungqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ + itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VR */ + + if (ilvr) { + claset_("Full", n, n, &c_b1, &c_b2, &vr[vr_offset], ldvr); + } + +/* Reduce to generalized Hessenberg form */ + + if (ilv) { + +/* Eigenvectors requested -- work on whole matrix. */ + + i__1 = *lwork + 1 - iwrk; + cgghd3_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[iwrk], + &i__1, &ierr); + } else { + i__1 = *lwork + 1 - iwrk; + cgghd3_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, + &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur form and Schur vectors) */ + + iwrk = itau; + if (ilv) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + i__1 = *lwork + 1 - iwrk; + chgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L70; + } + +/* Compute Eigenvectors */ + + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + + ctgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, + &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ + iwrk], &rwork[irwrk], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L70; + } + +/* Undo balancing on VL and VR and normalization */ + + if (ilvl) { + cggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, + &vl[vl_offset], ldvl, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.f; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vl_dim1; + r__3 = temp, r__4 = (r__1 = vl[i__3].r, abs(r__1)) + ( + r__2 = r_imag(&vl[jr + jc * vl_dim1]), abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L10: */ + } + if (temp < smlnum) { + goto L30; + } + temp = 1.f / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vl_dim1; + i__4 = jr + jc * vl_dim1; + q__1.r = temp * vl[i__4].r, q__1.i = temp * vl[i__4].i; + vl[i__3].r = q__1.r, vl[i__3].i = q__1.i; +/* L20: */ + } +L30: + ; + } + } + if (ilvr) { + cggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, + &vr[vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.f; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vr_dim1; + r__3 = temp, r__4 = (r__1 = vr[i__3].r, abs(r__1)) + ( + r__2 = r_imag(&vr[jr + jc * vr_dim1]), abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L40: */ + } + if (temp < smlnum) { + goto L60; + } + temp = 1.f / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vr_dim1; + i__4 = jr + jc * vr_dim1; + q__1.r = temp * vr[i__4].r, q__1.i = temp * vr[i__4].i; + vr[i__3].r = q__1.r, vr[i__3].i = q__1.i; +/* L50: */ + } +L60: + ; + } + } + } + +/* Undo scaling if necessary */ + +L70: + + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + return 0; + +/* End of CGGEV3 */ + +} /* cggev3_ */ + diff --git a/lapack-netlib/SRC/cggevx.c b/lapack-netlib/SRC/cggevx.c new file mode 100644 index 000000000..adca864e1 --- /dev/null +++ b/lapack-netlib/SRC/cggevx.c @@ -0,0 +1,1306 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGGEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGEVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, */ +/* ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI, */ +/* LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV, */ +/* WORK, LWORK, RWORK, IWORK, BWORK, INFO ) */ + +/* CHARACTER BALANC, JOBVL, JOBVR, SENSE */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N */ +/* REAL ABNRM, BBNRM */ +/* LOGICAL BWORK( * ) */ +/* INTEGER IWORK( * ) */ +/* REAL LSCALE( * ), RCONDE( * ), RCONDV( * ), */ +/* $ RSCALE( * ), RWORK( * ) */ +/* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGEVX computes for a pair of N-by-N complex nonsymmetric matrices */ +/* > (A,B) the generalized eigenvalues, and optionally, the left and/or */ +/* > right generalized eigenvectors. */ +/* > */ +/* > Optionally, it also computes a balancing transformation to improve */ +/* > the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ +/* > LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for */ +/* > the eigenvalues (RCONDE), and reciprocal condition numbers for the */ +/* > right eigenvectors (RCONDV). */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* > singular. It is usually represented as the pair (alpha,beta), as */ +/* > there is a reasonable interpretation for beta=0, and even for both */ +/* > being zero. */ +/* > */ +/* > The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > A * v(j) = lambda(j) * B * v(j) . */ +/* > The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > u(j)**H * A = lambda(j) * u(j)**H * B. */ +/* > where u(j)**H is the conjugate-transpose of u(j). */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] BALANC */ +/* > \verbatim */ +/* > BALANC is CHARACTER*1 */ +/* > Specifies the balance option to be performed: */ +/* > = 'N': do not diagonally scale or permute; */ +/* > = 'P': permute only; */ +/* > = 'S': scale only; */ +/* > = 'B': both permute and scale. */ +/* > Computed reciprocal condition numbers will be for the */ +/* > matrices after permuting and/or balancing. Permuting does */ +/* > not change condition numbers (in exact arithmetic), but */ +/* > balancing does. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': do not compute the left generalized eigenvectors; */ +/* > = 'V': compute the left generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': do not compute the right generalized eigenvectors; */ +/* > = 'V': compute the right generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': none are computed; */ +/* > = 'E': computed for eigenvalues only; */ +/* > = 'V': computed for eigenvectors only; */ +/* > = 'B': computed for eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VL, and VR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the matrix A in the pair (A,B). */ +/* > On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' */ +/* > or both, then A contains the first part of the complex Schur */ +/* > form of the "balanced" versions of the input A and B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the matrix B in the pair (A,B). */ +/* > On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' */ +/* > or both, then B contains the second part of the complex */ +/* > Schur form of the "balanced" versions of the input A and B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX array, dimension (N) */ +/* > On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized */ +/* > eigenvalues. */ +/* > */ +/* > Note: the quotient ALPHA(j)/BETA(j) ) may easily over- or */ +/* > underflow, and BETA(j) may even be zero. Thus, the user */ +/* > should avoid naively computing the ratio ALPHA/BETA. */ +/* > However, ALPHA will be always less than and usually */ +/* > comparable with norm(A) in magnitude, and BETA always less */ +/* > than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is COMPLEX array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left generalized eigenvectors u(j) are */ +/* > stored one after another in the columns of VL, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector will be scaled so the largest component */ +/* > will have abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the matrix VL. LDVL >= 1, and */ +/* > if JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is COMPLEX array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right generalized eigenvectors v(j) are */ +/* > stored one after another in the columns of VR, in the same */ +/* > order as their eigenvalues. */ +/* > Each eigenvector will be scaled so the largest component */ +/* > will have abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the matrix VR. LDVR >= 1, and */ +/* > if JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are integer values such that on exit */ +/* > A(i,j) = 0 and B(i,j) = 0 if i > j and */ +/* > j = 1,...,ILO-1 or i = IHI+1,...,N. */ +/* > If BALANC = 'N' or 'S', ILO = 1 and IHI = N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] LSCALE */ +/* > \verbatim */ +/* > LSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the left side of A and B. If PL(j) is the index of the */ +/* > row interchanged with row j, and DL(j) is the scaling */ +/* > factor applied to row j, then */ +/* > LSCALE(j) = PL(j) for j = 1,...,ILO-1 */ +/* > = DL(j) for j = ILO,...,IHI */ +/* > = PL(j) for j = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RSCALE */ +/* > \verbatim */ +/* > RSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the right side of A and B. If PR(j) is the index of the */ +/* > column interchanged with column j, and DR(j) is the scaling */ +/* > factor applied to column j, then */ +/* > RSCALE(j) = PR(j) for j = 1,...,ILO-1 */ +/* > = DR(j) for j = ILO,...,IHI */ +/* > = PR(j) for j = IHI+1,...,N */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ABNRM */ +/* > \verbatim */ +/* > ABNRM is REAL */ +/* > The one-norm of the balanced matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BBNRM */ +/* > \verbatim */ +/* > BBNRM is REAL */ +/* > The one-norm of the balanced matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is REAL array, dimension (N) */ +/* > If SENSE = 'E' or 'B', the reciprocal condition numbers of */ +/* > the eigenvalues, stored in consecutive elements of the array. */ +/* > If SENSE = 'N' or 'V', RCONDE is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is REAL array, dimension (N) */ +/* > If SENSE = 'V' or 'B', the estimated reciprocal condition */ +/* > numbers of the eigenvectors, stored in consecutive elements */ +/* > of the array. If the eigenvalues cannot be reordered to */ +/* > compute RCONDV(j), RCONDV(j) is set to 0; this can only occur */ +/* > when the true value would be very small anyway. */ +/* > If SENSE = 'N' or 'E', RCONDV is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > If SENSE = 'E', LWORK >= f2cmax(1,4*N). */ +/* > If SENSE = 'V' or 'B', LWORK >= f2cmax(1,2*N*N+2*N). */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (lrwork) */ +/* > lrwork must be at least f2cmax(1,6*N) if BALANC = 'S' or 'B', */ +/* > and at least f2cmax(1,2*N) otherwise. */ +/* > Real workspace. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N+2) */ +/* > If SENSE = 'E', IWORK is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > If SENSE = 'N', BWORK is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. No eigenvectors have been */ +/* > calculated, but ALPHA(j) and BETA(j) should be correct */ +/* > for j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in CHGEQZ. */ +/* > =N+2: error return from CTGEVC. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complexGEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Balancing a matrix pair (A,B) includes, first, permuting rows and */ +/* > columns to isolate eigenvalues, second, applying diagonal similarity */ +/* > transformation to the rows and columns to make the rows and columns */ +/* > as close in norm as possible. The computed reciprocal condition */ +/* > numbers correspond to the balanced matrix. Permuting rows and columns */ +/* > will not change the condition numbers (in exact arithmetic) but */ +/* > diagonal scaling will. For further explanation of balancing, see */ +/* > section 4.11.1.2 of LAPACK Users' Guide. */ +/* > */ +/* > An approximate error bound on the chordal distance between the i-th */ +/* > computed generalized eigenvalue w and the corresponding exact */ +/* > eigenvalue lambda is */ +/* > */ +/* > chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) */ +/* > */ +/* > An approximate error bound for the angle between the i-th computed */ +/* > eigenvector VL(i) or VR(i) is given by */ +/* > */ +/* > EPS * norm(ABNRM, BBNRM) / DIF(i). */ +/* > */ +/* > For further explanation of the reciprocal condition numbers RCONDE */ +/* > and RCONDV, see section 4.11 of LAPACK User's Guide. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, complex *a, integer *lda, complex *b, integer *ldb, + complex *alpha, complex *beta, complex *vl, integer *ldvl, complex * + vr, integer *ldvr, integer *ilo, integer *ihi, real *lscale, real * + rscale, real *abnrm, real *bbnrm, real *rconde, real *rcondv, complex + *work, integer *lwork, real *rwork, integer *iwork, logical *bwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4; + complex q__1; + + /* Local variables */ + real anrm, bnrm; + integer ierr, itau; + real temp; + logical ilvl, ilvr; + integer iwrk, iwrk1, i__, j, m; + extern logical lsame_(char *, char *); + integer icols; + logical noscl; + integer irows, jc; + extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, complex *, integer *, + integer *), cggbal_(char *, integer *, complex *, + integer *, complex *, integer *, integer *, integer *, real *, + real *, real *, integer *), slabad_(real *, real *); + integer in; + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *); + integer jr; + extern /* Subroutine */ int cgghrd_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, integer *), + clascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, complex *, integer *, integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), clacpy_( + char *, integer *, integer *, complex *, integer *, complex *, + integer *), claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *); + logical ldumma[1]; + char chtemp[1]; + real bignum; + extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + complex *, complex *, complex *, integer *, complex *, integer *, + complex *, integer *, real *, integer *), + ctgevc_(char *, char *, logical *, integer *, complex *, integer * + , complex *, integer *, complex *, integer *, complex *, integer * + , integer *, integer *, complex *, real *, integer *); + integer ijobvl; + extern /* Subroutine */ int ctgsna_(char *, char *, logical *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *, + complex *, integer *, real *, real *, integer *, integer *, + complex *, integer *, integer *, integer *), + slascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, real *, integer *, integer *), xerbla_(char *, + integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern real slamch_(char *); + integer ijobvr; + logical wantsb; + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *); + real anrmto; + logical wantse; + real bnrmto; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + integer minwrk, maxwrk; + logical wantsn; + real smlnum; + logical lquery, wantsv; + real eps; + logical ilv; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --lscale; + --rscale; + --rconde; + --rcondv; + --work; + --rwork; + --iwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + + noscl = lsame_(balanc, "N") || lsame_(balanc, "P"); + wantsn = lsame_(sense, "N"); + wantse = lsame_(sense, "E"); + wantsv = lsame_(sense, "V"); + wantsb = lsame_(sense, "B"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (! (noscl || lsame_(balanc, "S") || lsame_( + balanc, "B"))) { + *info = -1; + } else if (ijobvl <= 0) { + *info = -2; + } else if (ijobvr <= 0) { + *info = -3; + } else if (! (wantsn || wantse || wantsb || wantsv)) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -13; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -15; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. The workspace is */ +/* computed assuming ILO = 1 and IHI = N, the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + minwrk = *n << 1; + if (wantse) { + minwrk = *n << 2; + } else if (wantsv || wantsb) { + minwrk = (*n << 1) * (*n + 1); + } + maxwrk = minwrk; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "CUNMQR", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + if (ilvl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", + " ", n, &c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + } + } + work[1].r = (real) maxwrk, work[1].i = 0.f; + + if (*lwork < minwrk && ! lquery) { + *info = -25; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGEVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute and/or balance the matrix pair (A,B) */ +/* (Real Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) */ + + cggbal_(balanc, n, &a[a_offset], lda, &b[b_offset], ldb, ilo, ihi, & + lscale[1], &rscale[1], &rwork[1], &ierr); + +/* Compute ABNRM and BBNRM */ + + *abnrm = clange_("1", n, n, &a[a_offset], lda, &rwork[1]); + if (ilascl) { + rwork[1] = *abnrm; + slascl_("G", &c__0, &c__0, &anrmto, &anrm, &c__1, &c__1, &rwork[1], & + c__1, &ierr); + *abnrm = rwork[1]; + } + + *bbnrm = clange_("1", n, n, &b[b_offset], ldb, &rwork[1]); + if (ilbscl) { + rwork[1] = *bbnrm; + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, &c__1, &c__1, &rwork[1], & + c__1, &ierr); + *bbnrm = rwork[1]; + } + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Complex Workspace: need N, prefer N*NB ) */ + + irows = *ihi + 1 - *ilo; + if (ilv || ! wantsn) { + icols = *n + 1 - *ilo; + } else { + icols = irows; + } + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + cgeqrf_(&irows, &icols, &b[*ilo + *ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the unitary transformation to A */ +/* (Complex Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + cunmqr_("L", "C", &irows, &icols, &irows, &b[*ilo + *ilo * b_dim1], ldb, & + work[itau], &a[*ilo + *ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL and/or VR */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvl) { + claset_("Full", n, n, &c_b1, &c_b2, &vl[vl_offset], ldvl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + clacpy_("L", &i__1, &i__2, &b[*ilo + 1 + *ilo * b_dim1], ldb, &vl[ + *ilo + 1 + *ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + cungqr_(&irows, &irows, &irows, &vl[*ilo + *ilo * vl_dim1], ldvl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + + if (ilvr) { + claset_("Full", n, n, &c_b1, &c_b2, &vr[vr_offset], ldvr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + if (ilv || ! wantsn) { + +/* Eigenvectors requested -- work on whole matrix. */ + + cgghrd_(jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); + } else { + cgghrd_("N", "N", &irows, &c__1, &irows, &a[*ilo + *ilo * a_dim1], + lda, &b[*ilo + *ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur forms and Schur vectors) */ +/* (Complex Workspace: need N) */ +/* (Real Workspace: need N) */ + + iwrk = itau; + if (ilv || ! wantsn) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + + i__1 = *lwork + 1 - iwrk; + chgeqz_(chtemp, jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset] + , ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, &vr[vr_offset], + ldvr, &work[iwrk], &i__1, &rwork[1], &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L90; + } + +/* Compute Eigenvectors and estimate condition numbers if desired */ +/* CTGEVC: (Complex Workspace: need 2*N ) */ +/* (Real Workspace: need 2*N ) */ +/* CTGSNA: (Complex Workspace: need 2*N*N if SENSE='V' or 'B') */ +/* (Integer Workspace: need N+2 ) */ + + if (ilv || ! wantsn) { + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + + ctgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, & + work[iwrk], &rwork[1], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L90; + } + } + + if (! wantsn) { + +/* compute eigenvectors (STGEVC) and estimate condition */ +/* numbers (STGSNA). Note that the definition of the condition */ +/* number is not invariant under transformation (u,v) to */ +/* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized */ +/* Schur form (S,T), Q and Z are orthogonal matrices. In order */ +/* to avoid using extra 2*N*N workspace, we have to */ +/* re-calculate eigenvectors and estimate the condition numbers */ +/* one at a time. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + bwork[j] = FALSE_; +/* L10: */ + } + bwork[i__] = TRUE_; + + iwrk = *n + 1; + iwrk1 = iwrk + *n; + + if (wantse || wantsb) { + ctgevc_("B", "S", &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &work[1], n, &work[iwrk], n, & + c__1, &m, &work[iwrk1], &rwork[1], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L90; + } + } + + i__2 = *lwork - iwrk1 + 1; + ctgsna_(sense, "S", &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &work[1], n, &work[iwrk], n, &rconde[ + i__], &rcondv[i__], &c__1, &m, &work[iwrk1], &i__2, & + iwork[1], &ierr); + +/* L20: */ + } + } + } + +/* Undo balancing on VL and VR and normalization */ +/* (Workspace: none needed) */ + + if (ilvl) { + cggbak_(balanc, "L", n, ilo, ihi, &lscale[1], &rscale[1], n, &vl[ + vl_offset], ldvl, &ierr); + + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.f; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vl_dim1; + r__3 = temp, r__4 = (r__1 = vl[i__3].r, abs(r__1)) + (r__2 = + r_imag(&vl[jr + jc * vl_dim1]), abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L30: */ + } + if (temp < smlnum) { + goto L50; + } + temp = 1.f / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vl_dim1; + i__4 = jr + jc * vl_dim1; + q__1.r = temp * vl[i__4].r, q__1.i = temp * vl[i__4].i; + vl[i__3].r = q__1.r, vl[i__3].i = q__1.i; +/* L40: */ + } +L50: + ; + } + } + + if (ilvr) { + cggbak_(balanc, "R", n, ilo, ihi, &lscale[1], &rscale[1], n, &vr[ + vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + temp = 0.f; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + i__3 = jr + jc * vr_dim1; + r__3 = temp, r__4 = (r__1 = vr[i__3].r, abs(r__1)) + (r__2 = + r_imag(&vr[jr + jc * vr_dim1]), abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L60: */ + } + if (temp < smlnum) { + goto L80; + } + temp = 1.f / temp; + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + i__3 = jr + jc * vr_dim1; + i__4 = jr + jc * vr_dim1; + q__1.r = temp * vr[i__4].r, q__1.i = temp * vr[i__4].i; + vr[i__3].r = q__1.r, vr[i__3].i = q__1.i; +/* L70: */ + } +L80: + ; + } + } + +/* Undo scaling if necessary */ + +L90: + + if (ilascl) { + clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & + ierr); + } + + if (ilbscl) { + clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + work[1].r = (real) maxwrk, work[1].i = 0.f; + return 0; + +/* End of CGGEVX */ + +} /* cggevx_ */ + diff --git a/lapack-netlib/SRC/cggglm.c b/lapack-netlib/SRC/cggglm.c new file mode 100644 index 000000000..cc392b25e --- /dev/null +++ b/lapack-netlib/SRC/cggglm.c @@ -0,0 +1,798 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGGGLM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGGLM + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* COMPLEX A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), */ +/* $ X( * ), Y( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGGLM solves a general Gauss-Markov linear model (GLM) problem: */ +/* > */ +/* > minimize || y ||_2 subject to d = A*x + B*y */ +/* > x */ +/* > */ +/* > where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */ +/* > given N-vector. It is assumed that M <= N <= M+P, and */ +/* > */ +/* > rank(A) = M and rank( A B ) = N. */ +/* > */ +/* > Under these assumptions, the constrained equation is always */ +/* > consistent, and there is a unique solution x and a minimal 2-norm */ +/* > solution y, which is obtained using a generalized QR factorization */ +/* > of the matrices (A, B) given by */ +/* > */ +/* > A = Q*(R), B = Q*T*Z. */ +/* > (0) */ +/* > */ +/* > In particular, if matrix B is square nonsingular, then the problem */ +/* > GLM is equivalent to the following weighted linear least squares */ +/* > problem */ +/* > */ +/* > minimize || inv(B)*(d-A*x) ||_2 */ +/* > x */ +/* > */ +/* > where inv(B) denotes the inverse of B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix A. 0 <= M <= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of columns of the matrix B. P >= N-M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,M) */ +/* > On entry, the N-by-M matrix A. */ +/* > On exit, the upper triangular part of the array A contains */ +/* > the M-by-M upper triangular matrix R. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,P) */ +/* > On entry, the N-by-P matrix B. */ +/* > On exit, if N <= P, the upper triangle of the subarray */ +/* > B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ +/* > if N > P, the elements on and above the (N-P)th subdiagonal */ +/* > contain the N-by-P upper trapezoidal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > On entry, D is the left hand side of the GLM equation. */ +/* > On exit, D is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Y */ +/* > \verbatim */ +/* > Y is COMPLEX array, dimension (P) */ +/* > */ +/* > On exit, X and Y are the solutions of the GLM problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N+M+P). */ +/* > For optimum performance, LWORK >= M+f2cmin(N,P)+f2cmax(N,P)*NB, */ +/* > where NB is an upper bound for the optimal blocksizes for */ +/* > CGEQRF, CGERQF, CUNMQR and CUNMRQ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1: the upper triangular factor R associated with A in the */ +/* > generalized QR factorization of the pair (A, B) is */ +/* > singular, so that rank(A) < M; the least squares */ +/* > solution could not be computed. */ +/* > = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal */ +/* > factor T associated with B in the generalized QR */ +/* > factorization of the pair (A, B) is singular, so that */ +/* > rank( A B ) < N; the least squares solution could not */ +/* > be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int cggglm_(integer *n, integer *m, integer *p, complex *a, + integer *lda, complex *b, integer *ldb, complex *d__, complex *x, + complex *y, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + integer lopt, i__; + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *), ccopy_(integer *, complex *, integer *, + complex *, integer *); + integer nb, np; + extern /* Subroutine */ int cggqrf_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, complex *, + complex *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer lwkmin, nb1, nb2, nb3, nb4; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), cunmrq_(char *, + char *, integer *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *, complex *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int ctrtrs_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* =================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --d__; + --x; + --y; + --work; + + /* Function Body */ + *info = 0; + np = f2cmin(*n,*p); + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*m < 0 || *m > *n) { + *info = -2; + } else if (*p < 0 || *p < *n - *m) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + +/* Calculate workspace */ + + if (*info == 0) { + if (*n == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = ilaenv_(&c__1, "CGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb2 = ilaenv_(&c__1, "CGERQF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb3 = ilaenv_(&c__1, "CUNMQR", " ", n, m, p, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb4 = ilaenv_(&c__1, "CUNMRQ", " ", n, m, p, &c_n1, (ftnlen)6, ( + ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3); + nb = f2cmax(i__1,nb4); + lwkmin = *m + *n + *p; + lwkopt = *m + np + f2cmax(*n,*p) * nb; + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGGLM", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + x[i__2].r = 0.f, x[i__2].i = 0.f; + } + i__1 = *p; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; + } + return 0; + } + +/* Compute the GQR factorization of matrices A and B: */ + +/* Q**H*A = ( R11 ) M, Q**H*B*Z**H = ( T11 T12 ) M */ +/* ( 0 ) N-M ( 0 T22 ) N-M */ +/* M M+P-N N-M */ + +/* where R11 and T22 are upper triangular, and Q and Z are */ +/* unitary. */ + + i__1 = *lwork - *m - np; + cggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m + + 1], &work[*m + np + 1], &i__1, info); + i__1 = *m + np + 1; + lopt = work[i__1].r; + +/* Update left-hand-side vector d = Q**H*d = ( d1 ) M */ +/* ( d2 ) N-M */ + + i__1 = f2cmax(1,*n); + i__2 = *lwork - *m - np; + cunmqr_("Left", "Conjugate transpose", n, &c__1, m, &a[a_offset], lda, & + work[1], &d__[1], &i__1, &work[*m + np + 1], &i__2, info); +/* Computing MAX */ + i__3 = *m + np + 1; + i__1 = lopt, i__2 = (integer) work[i__3].r; + lopt = f2cmax(i__1,i__2); + +/* Solve T22*y2 = d2 for y2 */ + + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + ctrtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 + + (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, + info); + + if (*info > 0) { + *info = 1; + return 0; + } + + i__1 = *n - *m; + ccopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1); + } + +/* Set y1 = 0 */ + + i__1 = *m + *p - *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; +/* L10: */ + } + +/* Update d1 = d1 - T12*y2 */ + + i__1 = *n - *m; + q__1.r = -1.f, q__1.i = 0.f; + cgemv_("No transpose", m, &i__1, &q__1, &b[(*m + *p - *n + 1) * b_dim1 + + 1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b2, &d__[1], &c__1); + +/* Solve triangular system: R11*x = d1 */ + + if (*m > 0) { + ctrtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], + lda, &d__[1], m, info); + + if (*info > 0) { + *info = 2; + return 0; + } + +/* Copy D to X */ + + ccopy_(m, &d__[1], &c__1, &x[1], &c__1); + } + +/* Backward transformation y = Z**H *y */ + +/* Computing MAX */ + i__1 = 1, i__2 = *n - *p + 1; + i__3 = f2cmax(1,*p); + i__4 = *lwork - *m - np; + cunmrq_("Left", "Conjugate transpose", p, &c__1, &np, &b[f2cmax(i__1,i__2) + + b_dim1], ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], & + i__4, info); +/* Computing MAX */ + i__4 = *m + np + 1; + i__2 = lopt, i__3 = (integer) work[i__4].r; + i__1 = *m + np + f2cmax(i__2,i__3); + work[1].r = (real) i__1, work[1].i = 0.f; + + return 0; + +/* End of CGGGLM */ + +} /* cggglm_ */ + diff --git a/lapack-netlib/SRC/cgghd3.c b/lapack-netlib/SRC/cgghd3.c new file mode 100644 index 000000000..5f5b4bbcd --- /dev/null +++ b/lapack-netlib/SRC/cgghd3.c @@ -0,0 +1,1644 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGGHD3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGHD3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */ +/* $ LDQ, Z, LDZ, WORK, LWORK, INFO ) */ + +/* CHARACTER COMPQ, COMPZ */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK */ +/* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ Z( LDZ, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > */ +/* > CGGHD3 reduces a pair of complex matrices (A,B) to generalized upper */ +/* > Hessenberg form using unitary transformations, where A is a */ +/* > general matrix and B is upper triangular. The form of the */ +/* > generalized eigenvalue problem is */ +/* > A*x = lambda*B*x, */ +/* > and B is typically made upper triangular by computing its QR */ +/* > factorization and moving the unitary matrix Q to the left side */ +/* > of the equation. */ +/* > */ +/* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */ +/* > Q**H*A*Z = H */ +/* > and transforms B to another upper triangular matrix T: */ +/* > Q**H*B*Z = T */ +/* > in order to reduce the problem to its standard form */ +/* > H*y = lambda*T*y */ +/* > where y = Z**H*x. */ +/* > */ +/* > The unitary matrices Q and Z are determined as products of Givens */ +/* > rotations. They may either be formed explicitly, or they may be */ +/* > postmultiplied into input matrices Q1 and Z1, so that */ +/* > */ +/* > Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H */ +/* > */ +/* > Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H */ +/* > */ +/* > If Q1 is the unitary matrix from the QR factorization of B in the */ +/* > original equation A*x = lambda*B*x, then CGGHD3 reduces the original */ +/* > problem to generalized Hessenberg form. */ +/* > */ +/* > This is a blocked variant of CGGHRD, using matrix-matrix */ +/* > multiplications for parts of the computation to enhance performance. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPQ */ +/* > \verbatim */ +/* > COMPQ is CHARACTER*1 */ +/* > = 'N': do not compute Q; */ +/* > = 'I': Q is initialized to the unit matrix, and the */ +/* > unitary matrix Q is returned; */ +/* > = 'V': Q must contain a unitary matrix Q1 on entry, */ +/* > and the product Q1*Q is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': do not compute Z; */ +/* > = 'I': Z is initialized to the unit matrix, and the */ +/* > unitary matrix Z is returned; */ +/* > = 'V': Z must contain a unitary matrix Z1 on entry, */ +/* > and the product Z1*Z is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > ILO and IHI mark the rows and columns of A which are to be */ +/* > reduced. It is assumed that A is already upper triangular */ +/* > in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are */ +/* > normally set by a previous call to CGGBAL; otherwise they */ +/* > should be set to 1 and N respectively. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the N-by-N general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > rest is set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the N-by-N upper triangular matrix B. */ +/* > On exit, the upper triangular matrix T = Q**H B Z. The */ +/* > elements below the diagonal are set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX array, dimension (LDQ, N) */ +/* > On entry, if COMPQ = 'V', the unitary matrix Q1, typically */ +/* > from the QR factorization of B. */ +/* > On exit, if COMPQ='I', the unitary matrix Q, and if */ +/* > COMPQ = 'V', the product Q1*Q. */ +/* > Not referenced if COMPQ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. */ +/* > LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, N) */ +/* > On entry, if COMPZ = 'V', the unitary matrix Z1. */ +/* > On exit, if COMPZ='I', the unitary matrix Z, and if */ +/* > COMPZ = 'V', the product Z1*Z. */ +/* > Not referenced if COMPZ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. */ +/* > LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (LWORK) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1. */ +/* > For optimum performance LWORK >= 6*N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date January 2015 */ + +/* > \ingroup complexOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This routine reduces A to Hessenberg form and maintains B in */ +/* > using a blocked variant of Moler and Stewart's original algorithm, */ +/* > as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti */ +/* > (BIT 2008). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cgghd3_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, complex *a, integer *lda, complex *b, integer *ldb, + complex *q, integer *ldq, complex *z__, integer *ldz, complex *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + logical blk22; + integer cola, jcol, ierr; + complex temp; + extern /* Subroutine */ int crot_(integer *, complex *, integer *, + complex *, integer *, real *, complex *); + integer jrow, topq, ppwo; + complex temp1, temp2, temp3; + real c__; + integer kacc22, i__, j, k; + complex s; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + integer nbmin; + extern /* Subroutine */ int cunm22_(char *, char *, integer *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + complex *, integer *, integer *); + complex ctemp; + integer nblst; + logical initq; + complex c1, c2; + logical wantq; + integer j0; + extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *); + logical initz, wantz; + complex s1, s2; + char compq2[1], compz2[1]; + integer nb, jj, nh; + extern /* Subroutine */ int cgghrd_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, integer *); + integer nx, pw; + extern /* Subroutine */ int claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *), clartg_(complex *, + complex *, real *, complex *, complex *), clacpy_(char *, integer + *, integer *, complex *, integer *, complex *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer lwkopt; + logical lquery; + integer nnb, len, top, ppw, n2nb; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2015 */ + + + + +/* ===================================================================== */ + + +/* Decode and test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "CGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen) + 1); +/* Computing MAX */ + i__1 = *n * 6 * nb; + lwkopt = f2cmax(i__1,1); + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + initq = lsame_(compq, "I"); + wantq = initq || lsame_(compq, "V"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + lquery = *lwork == -1; + + if (! lsame_(compq, "N") && ! wantq) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*ihi > *n || *ihi < *ilo - 1) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (wantq && *ldq < *n || *ldq < 1) { + *info = -11; + } else if (wantz && *ldz < *n || *ldz < 1) { + *info = -13; + } else if (*lwork < 1 && ! lquery) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGHD3", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Initialize Q and Z if desired. */ + + if (initq) { + claset_("All", n, n, &c_b2, &c_b1, &q[q_offset], ldq); + } + if (initz) { + claset_("All", n, n, &c_b2, &c_b1, &z__[z_offset], ldz); + } + +/* Zero out lower triangle of B. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + claset_("Lower", &i__1, &i__2, &c_b2, &c_b2, &b[b_dim1 + 2], ldb); + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* Determine the blocksize. */ + + nbmin = ilaenv_(&c__2, "CGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb > 1 && nb < nh) { + +/* Determine when to use unblocked instead of blocked code. */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, "CGGHD3", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code. */ + + if (*lwork < lwkopt) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code. */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CGGHD3", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + if (*lwork >= *n * 6 * nbmin) { + nb = *lwork / (*n * 6); + } else { + nb = 1; + } + } + } + } + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + jcol = *ilo; + + } else { + +/* Use blocked code */ + + kacc22 = ilaenv_(&c__16, "CGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, + (ftnlen)1); + blk22 = kacc22 == 2; + i__1 = *ihi - 2; + i__2 = nb; + for (jcol = *ilo; i__2 < 0 ? jcol >= i__1 : jcol <= i__1; jcol += + i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - jcol - 1; + nnb = f2cmin(i__3,i__4); + +/* Initialize small unitary factors that will hold the */ +/* accumulated Givens rotations in workspace. */ +/* N2NB denotes the number of 2*NNB-by-2*NNB factors */ +/* NBLST denotes the (possibly smaller) order of the last */ +/* factor. */ + + n2nb = (*ihi - jcol - 1) / nnb - 1; + nblst = *ihi - jcol - n2nb * nnb; + claset_("All", &nblst, &nblst, &c_b2, &c_b1, &work[1], &nblst); + pw = nblst * nblst + 1; + i__3 = n2nb; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = nnb << 1; + i__5 = nnb << 1; + i__6 = nnb << 1; + claset_("All", &i__4, &i__5, &c_b2, &c_b1, &work[pw], &i__6); + pw += (nnb << 2) * nnb; + } + +/* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form. */ + + i__3 = jcol + nnb - 1; + for (j = jcol; j <= i__3; ++j) { + +/* Reduce Jth column of A. Store cosines and sines in Jth */ +/* column of A and B, respectively. */ + + i__4 = j + 2; + for (i__ = *ihi; i__ >= i__4; --i__) { + i__5 = i__ - 1 + j * a_dim1; + temp.r = a[i__5].r, temp.i = a[i__5].i; + clartg_(&temp, &a[i__ + j * a_dim1], &c__, &s, &a[i__ - 1 + + j * a_dim1]); + i__5 = i__ + j * a_dim1; + q__1.r = c__, q__1.i = 0.f; + a[i__5].r = q__1.r, a[i__5].i = q__1.i; + i__5 = i__ + j * b_dim1; + b[i__5].r = s.r, b[i__5].i = s.i; + } + +/* Accumulate Givens rotations into workspace array. */ + + ppw = (nblst + 1) * (nblst - 2) - j + jcol + 1; + len = j + 2 - jcol; + jrow = j + n2nb * nnb + 2; + i__4 = jrow; + for (i__ = *ihi; i__ >= i__4; --i__) { + i__5 = i__ + j * a_dim1; + ctemp.r = a[i__5].r, ctemp.i = a[i__5].i; + i__5 = i__ + j * b_dim1; + s.r = b[i__5].r, s.i = b[i__5].i; + i__5 = ppw + len - 1; + for (jj = ppw; jj <= i__5; ++jj) { + i__6 = jj + nblst; + temp.r = work[i__6].r, temp.i = work[i__6].i; + i__6 = jj + nblst; + q__2.r = ctemp.r * temp.r - ctemp.i * temp.i, q__2.i = + ctemp.r * temp.i + ctemp.i * temp.r; + i__7 = jj; + q__3.r = s.r * work[i__7].r - s.i * work[i__7].i, + q__3.i = s.r * work[i__7].i + s.i * work[i__7] + .r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + work[i__6].r = q__1.r, work[i__6].i = q__1.i; + i__6 = jj; + r_cnjg(&q__3, &s); + q__2.r = q__3.r * temp.r - q__3.i * temp.i, q__2.i = + q__3.r * temp.i + q__3.i * temp.r; + i__7 = jj; + q__4.r = ctemp.r * work[i__7].r - ctemp.i * work[i__7] + .i, q__4.i = ctemp.r * work[i__7].i + ctemp.i + * work[i__7].r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + work[i__6].r = q__1.r, work[i__6].i = q__1.i; + } + ++len; + ppw = ppw - nblst - 1; + } + + ppwo = nblst * nblst + (nnb + j - jcol - 1 << 1) * nnb + nnb; + j0 = jrow - nnb; + i__4 = j + 2; + i__5 = -nnb; + for (jrow = j0; i__5 < 0 ? jrow >= i__4 : jrow <= i__4; jrow + += i__5) { + ppw = ppwo; + len = j + 2 - jcol; + i__6 = jrow; + for (i__ = jrow + nnb - 1; i__ >= i__6; --i__) { + i__7 = i__ + j * a_dim1; + ctemp.r = a[i__7].r, ctemp.i = a[i__7].i; + i__7 = i__ + j * b_dim1; + s.r = b[i__7].r, s.i = b[i__7].i; + i__7 = ppw + len - 1; + for (jj = ppw; jj <= i__7; ++jj) { + i__8 = jj + (nnb << 1); + temp.r = work[i__8].r, temp.i = work[i__8].i; + i__8 = jj + (nnb << 1); + q__2.r = ctemp.r * temp.r - ctemp.i * temp.i, + q__2.i = ctemp.r * temp.i + ctemp.i * + temp.r; + i__9 = jj; + q__3.r = s.r * work[i__9].r - s.i * work[i__9].i, + q__3.i = s.r * work[i__9].i + s.i * work[ + i__9].r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + work[i__8].r = q__1.r, work[i__8].i = q__1.i; + i__8 = jj; + r_cnjg(&q__3, &s); + q__2.r = q__3.r * temp.r - q__3.i * temp.i, + q__2.i = q__3.r * temp.i + q__3.i * + temp.r; + i__9 = jj; + q__4.r = ctemp.r * work[i__9].r - ctemp.i * work[ + i__9].i, q__4.i = ctemp.r * work[i__9].i + + ctemp.i * work[i__9].r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + + q__4.i; + work[i__8].r = q__1.r, work[i__8].i = q__1.i; + } + ++len; + ppw = ppw - (nnb << 1) - 1; + } + ppwo += (nnb << 2) * nnb; + } + +/* TOP denotes the number of top rows in A and B that will */ +/* not be updated during the next steps. */ + + if (jcol <= 2) { + top = 0; + } else { + top = jcol; + } + +/* Propagate transformations through B and replace stored */ +/* left sines/cosines by right sines/cosines. */ + + i__5 = j + 1; + for (jj = *n; jj >= i__5; --jj) { + +/* Update JJth column of B. */ + +/* Computing MIN */ + i__4 = jj + 1; + i__6 = j + 2; + for (i__ = f2cmin(i__4,*ihi); i__ >= i__6; --i__) { + i__4 = i__ + j * a_dim1; + ctemp.r = a[i__4].r, ctemp.i = a[i__4].i; + i__4 = i__ + j * b_dim1; + s.r = b[i__4].r, s.i = b[i__4].i; + i__4 = i__ + jj * b_dim1; + temp.r = b[i__4].r, temp.i = b[i__4].i; + i__4 = i__ + jj * b_dim1; + q__2.r = ctemp.r * temp.r - ctemp.i * temp.i, q__2.i = + ctemp.r * temp.i + ctemp.i * temp.r; + r_cnjg(&q__4, &s); + i__7 = i__ - 1 + jj * b_dim1; + q__3.r = q__4.r * b[i__7].r - q__4.i * b[i__7].i, + q__3.i = q__4.r * b[i__7].i + q__4.i * b[i__7] + .r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; + i__4 = i__ - 1 + jj * b_dim1; + q__2.r = s.r * temp.r - s.i * temp.i, q__2.i = s.r * + temp.i + s.i * temp.r; + i__7 = i__ - 1 + jj * b_dim1; + q__3.r = ctemp.r * b[i__7].r - ctemp.i * b[i__7].i, + q__3.i = ctemp.r * b[i__7].i + ctemp.i * b[ + i__7].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; + } + +/* Annihilate B( JJ+1, JJ ). */ + + if (jj < *ihi) { + i__6 = jj + 1 + (jj + 1) * b_dim1; + temp.r = b[i__6].r, temp.i = b[i__6].i; + clartg_(&temp, &b[jj + 1 + jj * b_dim1], &c__, &s, &b[ + jj + 1 + (jj + 1) * b_dim1]); + i__6 = jj + 1 + jj * b_dim1; + b[i__6].r = 0.f, b[i__6].i = 0.f; + i__6 = jj - top; + crot_(&i__6, &b[top + 1 + (jj + 1) * b_dim1], &c__1, & + b[top + 1 + jj * b_dim1], &c__1, &c__, &s); + i__6 = jj + 1 + j * a_dim1; + q__1.r = c__, q__1.i = 0.f; + a[i__6].r = q__1.r, a[i__6].i = q__1.i; + i__6 = jj + 1 + j * b_dim1; + r_cnjg(&q__2, &s); + q__1.r = -q__2.r, q__1.i = -q__2.i; + b[i__6].r = q__1.r, b[i__6].i = q__1.i; + } + } + +/* Update A by transformations from right. */ + + jj = (*ihi - j - 1) % 3; + i__5 = jj + 1; + for (i__ = *ihi - j - 3; i__ >= i__5; i__ += -3) { + i__6 = j + 1 + i__ + j * a_dim1; + ctemp.r = a[i__6].r, ctemp.i = a[i__6].i; + i__6 = j + 1 + i__ + j * b_dim1; + q__1.r = -b[i__6].r, q__1.i = -b[i__6].i; + s.r = q__1.r, s.i = q__1.i; + i__6 = j + 2 + i__ + j * a_dim1; + c1.r = a[i__6].r, c1.i = a[i__6].i; + i__6 = j + 2 + i__ + j * b_dim1; + q__1.r = -b[i__6].r, q__1.i = -b[i__6].i; + s1.r = q__1.r, s1.i = q__1.i; + i__6 = j + 3 + i__ + j * a_dim1; + c2.r = a[i__6].r, c2.i = a[i__6].i; + i__6 = j + 3 + i__ + j * b_dim1; + q__1.r = -b[i__6].r, q__1.i = -b[i__6].i; + s2.r = q__1.r, s2.i = q__1.i; + + i__6 = *ihi; + for (k = top + 1; k <= i__6; ++k) { + i__4 = k + (j + i__) * a_dim1; + temp.r = a[i__4].r, temp.i = a[i__4].i; + i__4 = k + (j + i__ + 1) * a_dim1; + temp1.r = a[i__4].r, temp1.i = a[i__4].i; + i__4 = k + (j + i__ + 2) * a_dim1; + temp2.r = a[i__4].r, temp2.i = a[i__4].i; + i__4 = k + (j + i__ + 3) * a_dim1; + temp3.r = a[i__4].r, temp3.i = a[i__4].i; + i__4 = k + (j + i__ + 3) * a_dim1; + q__2.r = c2.r * temp3.r - c2.i * temp3.i, q__2.i = + c2.r * temp3.i + c2.i * temp3.r; + r_cnjg(&q__4, &s2); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, q__3.i = + q__4.r * temp2.i + q__4.i * temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + a[i__4].r = q__1.r, a[i__4].i = q__1.i; + q__3.r = -s2.r, q__3.i = -s2.i; + q__2.r = q__3.r * temp3.r - q__3.i * temp3.i, q__2.i = + q__3.r * temp3.i + q__3.i * temp3.r; + q__4.r = c2.r * temp2.r - c2.i * temp2.i, q__4.i = + c2.r * temp2.i + c2.i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + temp2.r = q__1.r, temp2.i = q__1.i; + i__4 = k + (j + i__ + 2) * a_dim1; + q__2.r = c1.r * temp2.r - c1.i * temp2.i, q__2.i = + c1.r * temp2.i + c1.i * temp2.r; + r_cnjg(&q__4, &s1); + q__3.r = q__4.r * temp1.r - q__4.i * temp1.i, q__3.i = + q__4.r * temp1.i + q__4.i * temp1.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + a[i__4].r = q__1.r, a[i__4].i = q__1.i; + q__3.r = -s1.r, q__3.i = -s1.i; + q__2.r = q__3.r * temp2.r - q__3.i * temp2.i, q__2.i = + q__3.r * temp2.i + q__3.i * temp2.r; + q__4.r = c1.r * temp1.r - c1.i * temp1.i, q__4.i = + c1.r * temp1.i + c1.i * temp1.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + temp1.r = q__1.r, temp1.i = q__1.i; + i__4 = k + (j + i__ + 1) * a_dim1; + q__2.r = ctemp.r * temp1.r - ctemp.i * temp1.i, + q__2.i = ctemp.r * temp1.i + ctemp.i * + temp1.r; + r_cnjg(&q__4, &s); + q__3.r = q__4.r * temp.r - q__4.i * temp.i, q__3.i = + q__4.r * temp.i + q__4.i * temp.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + a[i__4].r = q__1.r, a[i__4].i = q__1.i; + i__4 = k + (j + i__) * a_dim1; + q__3.r = -s.r, q__3.i = -s.i; + q__2.r = q__3.r * temp1.r - q__3.i * temp1.i, q__2.i = + q__3.r * temp1.i + q__3.i * temp1.r; + q__4.r = ctemp.r * temp.r - ctemp.i * temp.i, q__4.i = + ctemp.r * temp.i + ctemp.i * temp.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + a[i__4].r = q__1.r, a[i__4].i = q__1.i; + } + } + + if (jj > 0) { + for (i__ = jj; i__ >= 1; --i__) { + i__5 = j + 1 + i__ + j * a_dim1; + c__ = (doublereal) a[i__5].r; + i__5 = *ihi - top; + r_cnjg(&q__2, &b[j + 1 + i__ + j * b_dim1]); + q__1.r = -q__2.r, q__1.i = -q__2.i; + crot_(&i__5, &a[top + 1 + (j + i__ + 1) * a_dim1], & + c__1, &a[top + 1 + (j + i__) * a_dim1], &c__1, + &c__, &q__1); + } + } + +/* Update (J+1)th column of A by transformations from left. */ + + if (j < jcol + nnb - 1) { + len = j + 1 - jcol; + +/* Multiply with the trailing accumulated unitary */ +/* matrix, which takes the form */ + +/* [ U11 U12 ] */ +/* U = [ ], */ +/* [ U21 U22 ] */ + +/* where U21 is a LEN-by-LEN matrix and U12 is lower */ +/* triangular. */ + + jrow = *ihi - nblst + 1; + cgemv_("Conjugate", &nblst, &len, &c_b1, &work[1], &nblst, + &a[jrow + (j + 1) * a_dim1], &c__1, &c_b2, &work[ + pw], &c__1); + ppw = pw + len; + i__5 = jrow + nblst - len - 1; + for (i__ = jrow; i__ <= i__5; ++i__) { + i__6 = ppw; + i__4 = i__ + (j + 1) * a_dim1; + work[i__6].r = a[i__4].r, work[i__6].i = a[i__4].i; + ++ppw; + } + i__5 = nblst - len; + ctrmv_("Lower", "Conjugate", "Non-unit", &i__5, &work[len + * nblst + 1], &nblst, &work[pw + len], &c__1); + i__5 = nblst - len; + cgemv_("Conjugate", &len, &i__5, &c_b1, &work[(len + 1) * + nblst - len + 1], &nblst, &a[jrow + nblst - len + + (j + 1) * a_dim1], &c__1, &c_b1, &work[pw + len], + &c__1); + ppw = pw; + i__5 = jrow + nblst - 1; + for (i__ = jrow; i__ <= i__5; ++i__) { + i__6 = i__ + (j + 1) * a_dim1; + i__4 = ppw; + a[i__6].r = work[i__4].r, a[i__6].i = work[i__4].i; + ++ppw; + } + +/* Multiply with the other accumulated unitary */ +/* matrices, which take the form */ + +/* [ U11 U12 0 ] */ +/* [ ] */ +/* U = [ U21 U22 0 ], */ +/* [ ] */ +/* [ 0 0 I ] */ + +/* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity */ +/* matrix, U21 is a LEN-by-LEN upper triangular matrix */ +/* and U12 is an NNB-by-NNB lower triangular matrix. */ + + ppwo = nblst * nblst + 1; + j0 = jrow - nnb; + i__5 = jcol + 1; + i__6 = -nnb; + for (jrow = j0; i__6 < 0 ? jrow >= i__5 : jrow <= i__5; + jrow += i__6) { + ppw = pw + len; + i__4 = jrow + nnb - 1; + for (i__ = jrow; i__ <= i__4; ++i__) { + i__7 = ppw; + i__8 = i__ + (j + 1) * a_dim1; + work[i__7].r = a[i__8].r, work[i__7].i = a[i__8] + .i; + ++ppw; + } + ppw = pw; + i__4 = jrow + nnb + len - 1; + for (i__ = jrow + nnb; i__ <= i__4; ++i__) { + i__7 = ppw; + i__8 = i__ + (j + 1) * a_dim1; + work[i__7].r = a[i__8].r, work[i__7].i = a[i__8] + .i; + ++ppw; + } + i__4 = nnb << 1; + ctrmv_("Upper", "Conjugate", "Non-unit", &len, &work[ + ppwo + nnb], &i__4, &work[pw], &c__1); + i__4 = nnb << 1; + ctrmv_("Lower", "Conjugate", "Non-unit", &nnb, &work[ + ppwo + (len << 1) * nnb], &i__4, &work[pw + + len], &c__1); + i__4 = nnb << 1; + cgemv_("Conjugate", &nnb, &len, &c_b1, &work[ppwo], & + i__4, &a[jrow + (j + 1) * a_dim1], &c__1, & + c_b1, &work[pw], &c__1); + i__4 = nnb << 1; + cgemv_("Conjugate", &len, &nnb, &c_b1, &work[ppwo + ( + len << 1) * nnb + nnb], &i__4, &a[jrow + nnb + + (j + 1) * a_dim1], &c__1, &c_b1, &work[pw + + len], &c__1); + ppw = pw; + i__4 = jrow + len + nnb - 1; + for (i__ = jrow; i__ <= i__4; ++i__) { + i__7 = i__ + (j + 1) * a_dim1; + i__8 = ppw; + a[i__7].r = work[i__8].r, a[i__7].i = work[i__8] + .i; + ++ppw; + } + ppwo += (nnb << 2) * nnb; + } + } + } + +/* Apply accumulated unitary matrices to A. */ + + cola = *n - jcol - nnb + 1; + j = *ihi - nblst + 1; + cgemm_("Conjugate", "No Transpose", &nblst, &cola, &nblst, &c_b1, + &work[1], &nblst, &a[j + (jcol + nnb) * a_dim1], lda, & + c_b2, &work[pw], &nblst); + clacpy_("All", &nblst, &cola, &work[pw], &nblst, &a[j + (jcol + + nnb) * a_dim1], lda); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__3 = jcol + 1; + i__6 = -nnb; + for (j = j0; i__6 < 0 ? j >= i__3 : j <= i__3; j += i__6) { + if (blk22) { + +/* Exploit the structure of */ + +/* [ U11 U12 ] */ +/* U = [ ] */ +/* [ U21 U22 ], */ + +/* where all blocks are NNB-by-NNB, U21 is upper */ +/* triangular and U12 is lower triangular. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + cunm22_("Left", "Conjugate", &i__5, &cola, &nnb, &nnb, & + work[ppwo], &i__4, &a[j + (jcol + nnb) * a_dim1], + lda, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + i__8 = nnb << 1; + cgemm_("Conjugate", "No Transpose", &i__5, &cola, &i__4, & + c_b1, &work[ppwo], &i__7, &a[j + (jcol + nnb) * + a_dim1], lda, &c_b2, &work[pw], &i__8); + i__5 = nnb << 1; + i__4 = nnb << 1; + clacpy_("All", &i__5, &cola, &work[pw], &i__4, &a[j + ( + jcol + nnb) * a_dim1], lda); + } + ppwo += (nnb << 2) * nnb; + } + +/* Apply accumulated unitary matrices to Q. */ + + if (wantq) { + j = *ihi - nblst + 1; + if (initq) { +/* Computing MAX */ + i__6 = 2, i__3 = j - jcol + 1; + topq = f2cmax(i__6,i__3); + nh = *ihi - topq + 1; + } else { + topq = 1; + nh = *n; + } + cgemm_("No Transpose", "No Transpose", &nh, &nblst, &nblst, & + c_b1, &q[topq + j * q_dim1], ldq, &work[1], &nblst, & + c_b2, &work[pw], &nh); + clacpy_("All", &nh, &nblst, &work[pw], &nh, &q[topq + j * + q_dim1], ldq); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__6 = jcol + 1; + i__3 = -nnb; + for (j = j0; i__3 < 0 ? j >= i__6 : j <= i__6; j += i__3) { + if (initq) { +/* Computing MAX */ + i__5 = 2, i__4 = j - jcol + 1; + topq = f2cmax(i__5,i__4); + nh = *ihi - topq + 1; + } + if (blk22) { + +/* Exploit the structure of U. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + cunm22_("Right", "No Transpose", &nh, &i__5, &nnb, & + nnb, &work[ppwo], &i__4, &q[topq + j * q_dim1] + , ldq, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + cgemm_("No Transpose", "No Transpose", &nh, &i__5, & + i__4, &c_b1, &q[topq + j * q_dim1], ldq, & + work[ppwo], &i__7, &c_b2, &work[pw], &nh); + i__5 = nnb << 1; + clacpy_("All", &nh, &i__5, &work[pw], &nh, &q[topq + + j * q_dim1], ldq); + } + ppwo += (nnb << 2) * nnb; + } + } + +/* Accumulate right Givens rotations if required. */ + + if (wantz || top > 0) { + +/* Initialize small unitary factors that will hold the */ +/* accumulated Givens rotations in workspace. */ + + claset_("All", &nblst, &nblst, &c_b2, &c_b1, &work[1], &nblst); + pw = nblst * nblst + 1; + i__3 = n2nb; + for (i__ = 1; i__ <= i__3; ++i__) { + i__6 = nnb << 1; + i__5 = nnb << 1; + i__4 = nnb << 1; + claset_("All", &i__6, &i__5, &c_b2, &c_b1, &work[pw], & + i__4); + pw += (nnb << 2) * nnb; + } + +/* Accumulate Givens rotations into workspace array. */ + + i__3 = jcol + nnb - 1; + for (j = jcol; j <= i__3; ++j) { + ppw = (nblst + 1) * (nblst - 2) - j + jcol + 1; + len = j + 2 - jcol; + jrow = j + n2nb * nnb + 2; + i__6 = jrow; + for (i__ = *ihi; i__ >= i__6; --i__) { + i__5 = i__ + j * a_dim1; + ctemp.r = a[i__5].r, ctemp.i = a[i__5].i; + i__5 = i__ + j * a_dim1; + a[i__5].r = 0.f, a[i__5].i = 0.f; + i__5 = i__ + j * b_dim1; + s.r = b[i__5].r, s.i = b[i__5].i; + i__5 = i__ + j * b_dim1; + b[i__5].r = 0.f, b[i__5].i = 0.f; + i__5 = ppw + len - 1; + for (jj = ppw; jj <= i__5; ++jj) { + i__4 = jj + nblst; + temp.r = work[i__4].r, temp.i = work[i__4].i; + i__4 = jj + nblst; + q__2.r = ctemp.r * temp.r - ctemp.i * temp.i, + q__2.i = ctemp.r * temp.i + ctemp.i * + temp.r; + r_cnjg(&q__4, &s); + i__7 = jj; + q__3.r = q__4.r * work[i__7].r - q__4.i * work[ + i__7].i, q__3.i = q__4.r * work[i__7].i + + q__4.i * work[i__7].r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + work[i__4].r = q__1.r, work[i__4].i = q__1.i; + i__4 = jj; + q__2.r = s.r * temp.r - s.i * temp.i, q__2.i = + s.r * temp.i + s.i * temp.r; + i__7 = jj; + q__3.r = ctemp.r * work[i__7].r - ctemp.i * work[ + i__7].i, q__3.i = ctemp.r * work[i__7].i + + ctemp.i * work[i__7].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + work[i__4].r = q__1.r, work[i__4].i = q__1.i; + } + ++len; + ppw = ppw - nblst - 1; + } + + ppwo = nblst * nblst + (nnb + j - jcol - 1 << 1) * nnb + + nnb; + j0 = jrow - nnb; + i__6 = j + 2; + i__5 = -nnb; + for (jrow = j0; i__5 < 0 ? jrow >= i__6 : jrow <= i__6; + jrow += i__5) { + ppw = ppwo; + len = j + 2 - jcol; + i__4 = jrow; + for (i__ = jrow + nnb - 1; i__ >= i__4; --i__) { + i__7 = i__ + j * a_dim1; + ctemp.r = a[i__7].r, ctemp.i = a[i__7].i; + i__7 = i__ + j * a_dim1; + a[i__7].r = 0.f, a[i__7].i = 0.f; + i__7 = i__ + j * b_dim1; + s.r = b[i__7].r, s.i = b[i__7].i; + i__7 = i__ + j * b_dim1; + b[i__7].r = 0.f, b[i__7].i = 0.f; + i__7 = ppw + len - 1; + for (jj = ppw; jj <= i__7; ++jj) { + i__8 = jj + (nnb << 1); + temp.r = work[i__8].r, temp.i = work[i__8].i; + i__8 = jj + (nnb << 1); + q__2.r = ctemp.r * temp.r - ctemp.i * temp.i, + q__2.i = ctemp.r * temp.i + ctemp.i * + temp.r; + r_cnjg(&q__4, &s); + i__9 = jj; + q__3.r = q__4.r * work[i__9].r - q__4.i * + work[i__9].i, q__3.i = q__4.r * work[ + i__9].i + q__4.i * work[i__9].r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + work[i__8].r = q__1.r, work[i__8].i = q__1.i; + i__8 = jj; + q__2.r = s.r * temp.r - s.i * temp.i, q__2.i = + s.r * temp.i + s.i * temp.r; + i__9 = jj; + q__3.r = ctemp.r * work[i__9].r - ctemp.i * + work[i__9].i, q__3.i = ctemp.r * work[ + i__9].i + ctemp.i * work[i__9].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + work[i__8].r = q__1.r, work[i__8].i = q__1.i; + } + ++len; + ppw = ppw - (nnb << 1) - 1; + } + ppwo += (nnb << 2) * nnb; + } + } + } else { + + i__3 = *ihi - jcol - 1; + claset_("Lower", &i__3, &nnb, &c_b2, &c_b2, &a[jcol + 2 + + jcol * a_dim1], lda); + i__3 = *ihi - jcol - 1; + claset_("Lower", &i__3, &nnb, &c_b2, &c_b2, &b[jcol + 2 + + jcol * b_dim1], ldb); + } + +/* Apply accumulated unitary matrices to A and B. */ + + if (top > 0) { + j = *ihi - nblst + 1; + cgemm_("No Transpose", "No Transpose", &top, &nblst, &nblst, & + c_b1, &a[j * a_dim1 + 1], lda, &work[1], &nblst, & + c_b2, &work[pw], &top); + clacpy_("All", &top, &nblst, &work[pw], &top, &a[j * a_dim1 + + 1], lda); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__3 = jcol + 1; + i__5 = -nnb; + for (j = j0; i__5 < 0 ? j >= i__3 : j <= i__3; j += i__5) { + if (blk22) { + +/* Exploit the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + cunm22_("Right", "No Transpose", &top, &i__6, &nnb, & + nnb, &work[ppwo], &i__4, &a[j * a_dim1 + 1], + lda, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + cgemm_("No Transpose", "No Transpose", &top, &i__6, & + i__4, &c_b1, &a[j * a_dim1 + 1], lda, &work[ + ppwo], &i__7, &c_b2, &work[pw], &top); + i__6 = nnb << 1; + clacpy_("All", &top, &i__6, &work[pw], &top, &a[j * + a_dim1 + 1], lda); + } + ppwo += (nnb << 2) * nnb; + } + + j = *ihi - nblst + 1; + cgemm_("No Transpose", "No Transpose", &top, &nblst, &nblst, & + c_b1, &b[j * b_dim1 + 1], ldb, &work[1], &nblst, & + c_b2, &work[pw], &top); + clacpy_("All", &top, &nblst, &work[pw], &top, &b[j * b_dim1 + + 1], ldb); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__5 = jcol + 1; + i__3 = -nnb; + for (j = j0; i__3 < 0 ? j >= i__5 : j <= i__5; j += i__3) { + if (blk22) { + +/* Exploit the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + cunm22_("Right", "No Transpose", &top, &i__6, &nnb, & + nnb, &work[ppwo], &i__4, &b[j * b_dim1 + 1], + ldb, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + cgemm_("No Transpose", "No Transpose", &top, &i__6, & + i__4, &c_b1, &b[j * b_dim1 + 1], ldb, &work[ + ppwo], &i__7, &c_b2, &work[pw], &top); + i__6 = nnb << 1; + clacpy_("All", &top, &i__6, &work[pw], &top, &b[j * + b_dim1 + 1], ldb); + } + ppwo += (nnb << 2) * nnb; + } + } + +/* Apply accumulated unitary matrices to Z. */ + + if (wantz) { + j = *ihi - nblst + 1; + if (initq) { +/* Computing MAX */ + i__3 = 2, i__5 = j - jcol + 1; + topq = f2cmax(i__3,i__5); + nh = *ihi - topq + 1; + } else { + topq = 1; + nh = *n; + } + cgemm_("No Transpose", "No Transpose", &nh, &nblst, &nblst, & + c_b1, &z__[topq + j * z_dim1], ldz, &work[1], &nblst, + &c_b2, &work[pw], &nh); + clacpy_("All", &nh, &nblst, &work[pw], &nh, &z__[topq + j * + z_dim1], ldz); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__3 = jcol + 1; + i__5 = -nnb; + for (j = j0; i__5 < 0 ? j >= i__3 : j <= i__3; j += i__5) { + if (initq) { +/* Computing MAX */ + i__6 = 2, i__4 = j - jcol + 1; + topq = f2cmax(i__6,i__4); + nh = *ihi - topq + 1; + } + if (blk22) { + +/* Exploit the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + cunm22_("Right", "No Transpose", &nh, &i__6, &nnb, & + nnb, &work[ppwo], &i__4, &z__[topq + j * + z_dim1], ldz, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + cgemm_("No Transpose", "No Transpose", &nh, &i__6, & + i__4, &c_b1, &z__[topq + j * z_dim1], ldz, & + work[ppwo], &i__7, &c_b2, &work[pw], &nh); + i__6 = nnb << 1; + clacpy_("All", &nh, &i__6, &work[pw], &nh, &z__[topq + + j * z_dim1], ldz); + } + ppwo += (nnb << 2) * nnb; + } + } + } + } + +/* Use unblocked code to reduce the rest of the matrix */ +/* Avoid re-initialization of modified Q and Z. */ + + *(unsigned char *)compq2 = *(unsigned char *)compq; + *(unsigned char *)compz2 = *(unsigned char *)compz; + if (jcol != *ilo) { + if (wantq) { + *(unsigned char *)compq2 = 'V'; + } + if (wantz) { + *(unsigned char *)compz2 = 'V'; + } + } + + if (jcol < *ihi) { + cgghrd_(compq2, compz2, n, &jcol, ihi, &a[a_offset], lda, &b[b_offset] + , ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &ierr); + } + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + + return 0; + +/* End of CGGHD3 */ + +} /* cgghd3_ */ + diff --git a/lapack-netlib/SRC/cgghrd.c b/lapack-netlib/SRC/cgghrd.c new file mode 100644 index 000000000..bbacf4c88 --- /dev/null +++ b/lapack-netlib/SRC/cgghrd.c @@ -0,0 +1,788 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGGHRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGHRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */ +/* LDQ, Z, LDZ, INFO ) */ + +/* CHARACTER COMPQ, COMPZ */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N */ +/* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGHRD reduces a pair of complex matrices (A,B) to generalized upper */ +/* > Hessenberg form using unitary transformations, where A is a */ +/* > general matrix and B is upper triangular. The form of the generalized */ +/* > eigenvalue problem is */ +/* > A*x = lambda*B*x, */ +/* > and B is typically made upper triangular by computing its QR */ +/* > factorization and moving the unitary matrix Q to the left side */ +/* > of the equation. */ +/* > */ +/* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */ +/* > Q**H*A*Z = H */ +/* > and transforms B to another upper triangular matrix T: */ +/* > Q**H*B*Z = T */ +/* > in order to reduce the problem to its standard form */ +/* > H*y = lambda*T*y */ +/* > where y = Z**H*x. */ +/* > */ +/* > The unitary matrices Q and Z are determined as products of Givens */ +/* > rotations. They may either be formed explicitly, or they may be */ +/* > postmultiplied into input matrices Q1 and Z1, so that */ +/* > Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H */ +/* > Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H */ +/* > If Q1 is the unitary matrix from the QR factorization of B in the */ +/* > original equation A*x = lambda*B*x, then CGGHRD reduces the original */ +/* > problem to generalized Hessenberg form. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPQ */ +/* > \verbatim */ +/* > COMPQ is CHARACTER*1 */ +/* > = 'N': do not compute Q; */ +/* > = 'I': Q is initialized to the unit matrix, and the */ +/* > unitary matrix Q is returned; */ +/* > = 'V': Q must contain a unitary matrix Q1 on entry, */ +/* > and the product Q1*Q is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': do not compute Z; */ +/* > = 'I': Z is initialized to the unit matrix, and the */ +/* > unitary matrix Z is returned; */ +/* > = 'V': Z must contain a unitary matrix Z1 on entry, */ +/* > and the product Z1*Z is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > ILO and IHI mark the rows and columns of A which are to be */ +/* > reduced. It is assumed that A is already upper triangular */ +/* > in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are */ +/* > normally set by a previous call to CGGBAL; otherwise they */ +/* > should be set to 1 and N respectively. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the N-by-N general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > rest is set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the N-by-N upper triangular matrix B. */ +/* > On exit, the upper triangular matrix T = Q**H B Z. The */ +/* > elements below the diagonal are set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX array, dimension (LDQ, N) */ +/* > On entry, if COMPQ = 'V', the unitary matrix Q1, typically */ +/* > from the QR factorization of B. */ +/* > On exit, if COMPQ='I', the unitary matrix Q, and if */ +/* > COMPQ = 'V', the product Q1*Q. */ +/* > Not referenced if COMPQ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. */ +/* > LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, N) */ +/* > On entry, if COMPZ = 'V', the unitary matrix Z1. */ +/* > On exit, if COMPZ='I', the unitary matrix Z, and if */ +/* > COMPZ = 'V', the product Z1*Z. */ +/* > Not referenced if COMPZ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. */ +/* > LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This routine reduces A to Hessenberg and B to triangular form by */ +/* > an unblocked reduction, as described in _Matrix_Computations_, */ +/* > by Golub and van Loan (Johns Hopkins Press). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, complex *a, integer *lda, complex *b, integer *ldb, + complex *q, integer *ldq, complex *z__, integer *ldz, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + integer jcol; + extern /* Subroutine */ int crot_(integer *, complex *, integer *, + complex *, integer *, real *, complex *); + integer jrow; + real c__; + complex s; + extern logical lsame_(char *, char *); + complex ctemp; + extern /* Subroutine */ int claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *), clartg_(complex *, + complex *, real *, complex *, complex *), xerbla_(char *, integer + *, ftnlen); + integer icompq, icompz; + logical ilq, ilz; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Decode COMPQ */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + + /* Function Body */ + if (lsame_(compq, "N")) { + ilq = FALSE_; + icompq = 1; + } else if (lsame_(compq, "V")) { + ilq = TRUE_; + icompq = 2; + } else if (lsame_(compq, "I")) { + ilq = TRUE_; + icompq = 3; + } else { + icompq = 0; + } + +/* Decode COMPZ */ + + if (lsame_(compz, "N")) { + ilz = FALSE_; + icompz = 1; + } else if (lsame_(compz, "V")) { + ilz = TRUE_; + icompz = 2; + } else if (lsame_(compz, "I")) { + ilz = TRUE_; + icompz = 3; + } else { + icompz = 0; + } + +/* Test the input parameters. */ + + *info = 0; + if (icompq <= 0) { + *info = -1; + } else if (icompz <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*ihi > *n || *ihi < *ilo - 1) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (ilq && *ldq < *n || *ldq < 1) { + *info = -11; + } else if (ilz && *ldz < *n || *ldz < 1) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGHRD", &i__1, (ftnlen)6); + return 0; + } + +/* Initialize Q and Z if desired. */ + + if (icompq == 3) { + claset_("Full", n, n, &c_b2, &c_b1, &q[q_offset], ldq); + } + if (icompz == 3) { + claset_("Full", n, n, &c_b2, &c_b1, &z__[z_offset], ldz); + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + +/* Zero out lower triangle of B */ + + i__1 = *n - 1; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = jcol + 1; jrow <= i__2; ++jrow) { + i__3 = jrow + jcol * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + +/* Reduce A and B */ + + i__1 = *ihi - 2; + for (jcol = *ilo; jcol <= i__1; ++jcol) { + + i__2 = jcol + 2; + for (jrow = *ihi; jrow >= i__2; --jrow) { + +/* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */ + + i__3 = jrow - 1 + jcol * a_dim1; + ctemp.r = a[i__3].r, ctemp.i = a[i__3].i; + clartg_(&ctemp, &a[jrow + jcol * a_dim1], &c__, &s, &a[jrow - 1 + + jcol * a_dim1]); + i__3 = jrow + jcol * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; + i__3 = *n - jcol; + crot_(&i__3, &a[jrow - 1 + (jcol + 1) * a_dim1], lda, &a[jrow + ( + jcol + 1) * a_dim1], lda, &c__, &s); + i__3 = *n + 2 - jrow; + crot_(&i__3, &b[jrow - 1 + (jrow - 1) * b_dim1], ldb, &b[jrow + ( + jrow - 1) * b_dim1], ldb, &c__, &s); + if (ilq) { + r_cnjg(&q__1, &s); + crot_(n, &q[(jrow - 1) * q_dim1 + 1], &c__1, &q[jrow * q_dim1 + + 1], &c__1, &c__, &q__1); + } + +/* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */ + + i__3 = jrow + jrow * b_dim1; + ctemp.r = b[i__3].r, ctemp.i = b[i__3].i; + clartg_(&ctemp, &b[jrow + (jrow - 1) * b_dim1], &c__, &s, &b[jrow + + jrow * b_dim1]); + i__3 = jrow + (jrow - 1) * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; + crot_(ihi, &a[jrow * a_dim1 + 1], &c__1, &a[(jrow - 1) * a_dim1 + + 1], &c__1, &c__, &s); + i__3 = jrow - 1; + crot_(&i__3, &b[jrow * b_dim1 + 1], &c__1, &b[(jrow - 1) * b_dim1 + + 1], &c__1, &c__, &s); + if (ilz) { + crot_(n, &z__[jrow * z_dim1 + 1], &c__1, &z__[(jrow - 1) * + z_dim1 + 1], &c__1, &c__, &s); + } +/* L30: */ + } +/* L40: */ + } + + return 0; + +/* End of CGGHRD */ + +} /* cgghrd_ */ + diff --git a/lapack-netlib/SRC/cgglse.c b/lapack-netlib/SRC/cgglse.c new file mode 100644 index 000000000..78c86d2a5 --- /dev/null +++ b/lapack-netlib/SRC/cgglse.c @@ -0,0 +1,794 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGGLSE solves overdetermined or underdetermined systems for OTHER matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGLSE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* COMPLEX A( LDA, * ), B( LDB, * ), C( * ), D( * ), */ +/* $ WORK( * ), X( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGLSE solves the linear equality-constrained least squares (LSE) */ +/* > problem: */ +/* > */ +/* > minimize || c - A*x ||_2 subject to B*x = d */ +/* > */ +/* > where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */ +/* > M-vector, and d is a given P-vector. It is assumed that */ +/* > P <= N <= M+P, and */ +/* > */ +/* > rank(B) = P and rank( (A) ) = N. */ +/* > ( (B) ) */ +/* > */ +/* > These conditions ensure that the LSE problem has a unique solution, */ +/* > which is obtained using a generalized RQ factorization of the */ +/* > matrices (B, A) given by */ +/* > */ +/* > B = (0 R)*Q, A = Z*T*Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. 0 <= P <= N <= M+P. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */ +/* > contains the P-by-P upper triangular matrix R. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX array, dimension (M) */ +/* > On entry, C contains the right hand side vector for the */ +/* > least squares part of the LSE problem. */ +/* > On exit, the residual sum of squares for the solution */ +/* > is given by the sum of squares of elements N-P+1 to M of */ +/* > vector C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (P) */ +/* > On entry, D contains the right hand side vector for the */ +/* > constrained equation. */ +/* > On exit, D is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX array, dimension (N) */ +/* > On exit, X is the solution of the LSE problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,M+N+P). */ +/* > For optimum performance LWORK >= P+f2cmin(M,N)+f2cmax(M,N)*NB, */ +/* > where NB is an upper bound for the optimal blocksizes for */ +/* > CGEQRF, CGERQF, CUNMQR and CUNMRQ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1: the upper triangular factor R associated with B in the */ +/* > generalized RQ factorization of the pair (B, A) is */ +/* > singular, so that rank(B) < P; the least squares */ +/* > solution could not be computed. */ +/* > = 2: the (N-P) by (N-P) part of the upper trapezoidal factor */ +/* > T associated with A in the generalized RQ factorization */ +/* > of the pair (B, A) is singular, so that */ +/* > rank( (A) ) < N; the least squares solution could not */ +/* > ( (B) ) */ +/* > be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHERsolve */ + +/* ===================================================================== */ +/* Subroutine */ int cgglse_(integer *m, integer *n, integer *p, complex *a, + integer *lda, complex *b, integer *ldb, complex *c__, complex *d__, + complex *x, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + integer lopt; + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *), ccopy_(integer *, complex *, integer *, + complex *, integer *), caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *), ctrmv_(char *, char *, char *, + integer *, complex *, integer *, complex *, integer *); + integer nb, mn, nr; + extern /* Subroutine */ int cggrqf_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, complex *, + complex *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer lwkmin, nb1, nb2, nb3, nb4; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), cunmrq_(char *, + char *, integer *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *, complex *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int ctrtrs_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --c__; + --d__; + --x; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*p < 0 || *p > *n || *p < *n - *m) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*p)) { + *info = -7; + } + +/* Calculate workspace */ + + if (*info == 0) { + if (*n == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = ilaenv_(&c__1, "CGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb2 = ilaenv_(&c__1, "CGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb3 = ilaenv_(&c__1, "CUNMQR", " ", m, n, p, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb4 = ilaenv_(&c__1, "CUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, ( + ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3); + nb = f2cmax(i__1,nb4); + lwkmin = *m + *n + *p; + lwkopt = *p + mn + f2cmax(*m,*n) * nb; + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGLSE", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Compute the GRQ factorization of matrices B and A: */ + +/* B*Q**H = ( 0 T12 ) P Z**H*A*Q**H = ( R11 R12 ) N-P */ +/* N-P P ( 0 R22 ) M+P-N */ +/* N-P P */ + +/* where T12 and R11 are upper triangular, and Q and Z are */ +/* unitary. */ + + i__1 = *lwork - *p - mn; + cggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p + + 1], &work[*p + mn + 1], &i__1, info); + i__1 = *p + mn + 1; + lopt = work[i__1].r; + +/* Update c = Z**H *c = ( c1 ) N-P */ +/* ( c2 ) M+P-N */ + + i__1 = f2cmax(1,*m); + i__2 = *lwork - *p - mn; + cunmqr_("Left", "Conjugate Transpose", m, &c__1, &mn, &a[a_offset], lda, & + work[*p + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info); +/* Computing MAX */ + i__3 = *p + mn + 1; + i__1 = lopt, i__2 = (integer) work[i__3].r; + lopt = f2cmax(i__1,i__2); + +/* Solve T12*x2 = d for x2 */ + + if (*p > 0) { + ctrtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + + 1) * b_dim1 + 1], ldb, &d__[1], p, info); + + if (*info > 0) { + *info = 1; + return 0; + } + +/* Put the solution in X */ + + ccopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1); + +/* Update c1 */ + + i__1 = *n - *p; + q__1.r = -1.f, q__1.i = 0.f; + cgemv_("No transpose", &i__1, p, &q__1, &a[(*n - *p + 1) * a_dim1 + 1] + , lda, &d__[1], &c__1, &c_b1, &c__[1], &c__1); + } + +/* Solve R11*x1 = c1 for x1 */ + + if (*n > *p) { + i__1 = *n - *p; + i__2 = *n - *p; + ctrtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[ + a_offset], lda, &c__[1], &i__2, info); + + if (*info > 0) { + *info = 2; + return 0; + } + +/* Put the solutions in X */ + + i__1 = *n - *p; + ccopy_(&i__1, &c__[1], &c__1, &x[1], &c__1); + } + +/* Compute the residual vector: */ + + if (*m < *n) { + nr = *m + *p - *n; + if (nr > 0) { + i__1 = *n - *m; + q__1.r = -1.f, q__1.i = 0.f; + cgemv_("No transpose", &nr, &i__1, &q__1, &a[*n - *p + 1 + (*m + + 1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b1, &c__[*n - * + p + 1], &c__1); + } + } else { + nr = *p; + } + if (nr > 0) { + ctrmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n + - *p + 1) * a_dim1], lda, &d__[1], &c__1); + q__1.r = -1.f, q__1.i = 0.f; + caxpy_(&nr, &q__1, &d__[1], &c__1, &c__[*n - *p + 1], &c__1); + } + +/* Backward transformation x = Q**H*x */ + + i__1 = *lwork - *p - mn; + cunmrq_("Left", "Conjugate Transpose", n, &c__1, p, &b[b_offset], ldb, & + work[1], &x[1], n, &work[*p + mn + 1], &i__1, info); +/* Computing MAX */ + i__4 = *p + mn + 1; + i__2 = lopt, i__3 = (integer) work[i__4].r; + i__1 = *p + mn + f2cmax(i__2,i__3); + work[1].r = (real) i__1, work[1].i = 0.f; + + return 0; + +/* End of CGGLSE */ + +} /* cgglse_ */ + diff --git a/lapack-netlib/SRC/cggqrf.c b/lapack-netlib/SRC/cggqrf.c new file mode 100644 index 000000000..9774260c8 --- /dev/null +++ b/lapack-netlib/SRC/cggqrf.c @@ -0,0 +1,719 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGGQRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGQRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, */ +/* LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* COMPLEX A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGQRF computes a generalized QR factorization of an N-by-M matrix A */ +/* > and an N-by-P matrix B: */ +/* > */ +/* > A = Q*R, B = Q*T*Z, */ +/* > */ +/* > where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, */ +/* > and R and T assume one of the forms: */ +/* > */ +/* > if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, */ +/* > ( 0 ) N-M N M-N */ +/* > M */ +/* > */ +/* > where R11 is upper triangular, and */ +/* > */ +/* > if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, */ +/* > P-N N ( T21 ) P */ +/* > P */ +/* > */ +/* > where T12 or T21 is upper triangular. */ +/* > */ +/* > In particular, if B is square and nonsingular, the GQR factorization */ +/* > of A and B implicitly gives the QR factorization of inv(B)*A: */ +/* > */ +/* > inv(B)*A = Z**H * (inv(T)*R) */ +/* > */ +/* > where inv(B) denotes the inverse of the matrix B, and Z' denotes the */ +/* > conjugate transpose of matrix Z. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of columns of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,M) */ +/* > On entry, the N-by-M matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(N,M)-by-M upper trapezoidal matrix R (R is */ +/* > upper triangular if N >= M); the elements below the diagonal, */ +/* > with the array TAUA, represent the unitary matrix Q as a */ +/* > product of f2cmin(N,M) elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUA */ +/* > \verbatim */ +/* > TAUA is COMPLEX array, dimension (f2cmin(N,M)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the unitary matrix Q (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,P) */ +/* > On entry, the N-by-P matrix B. */ +/* > On exit, if N <= P, the upper triangle of the subarray */ +/* > B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ +/* > if N > P, the elements on and above the (N-P)-th subdiagonal */ +/* > contain the N-by-P upper trapezoidal matrix T; the remaining */ +/* > elements, with the array TAUB, represent the unitary */ +/* > matrix Z as a product of elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUB */ +/* > \verbatim */ +/* > TAUB is COMPLEX array, dimension (f2cmin(N,P)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the unitary matrix Z (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */ +/* > For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */ +/* > where NB1 is the optimal blocksize for the QR factorization */ +/* > of an N-by-M matrix, NB2 is the optimal blocksize for the */ +/* > RQ factorization of an N-by-P matrix, and NB3 is the optimal */ +/* > blocksize for a call of CUNMQR. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(n,m). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taua * v * v**H */ +/* > */ +/* > where taua is a complex scalar, and v is a complex vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ +/* > and taua in TAUA(i). */ +/* > To form Q explicitly, use LAPACK subroutine CUNGQR. */ +/* > To use Q to update another matrix, use LAPACK subroutine CUNMQR. */ +/* > */ +/* > The matrix Z is represented as a product of elementary reflectors */ +/* > */ +/* > Z = H(1) H(2) . . . H(k), where k = f2cmin(n,p). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taub * v * v**H */ +/* > */ +/* > where taub is a complex scalar, and v is a complex vector with */ +/* > v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in */ +/* > B(n-k+i,1:p-k+i-1), and taub in TAUB(i). */ +/* > To form Z explicitly, use LAPACK subroutine CUNGRQ. */ +/* > To use Z to update another matrix, use LAPACK subroutine CUNMRQ. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cggqrf_(integer *n, integer *m, integer *p, complex *a, + integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, + complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer lopt, nb; + extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), cgerqf_( + integer *, integer *, complex *, integer *, complex *, complex *, + integer *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer nb1, nb2, nb3; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --taua; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --taub; + --work; + + /* Function Body */ + *info = 0; + nb1 = ilaenv_(&c__1, "CGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb2 = ilaenv_(&c__1, "CGERQF", " ", n, p, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb3 = ilaenv_(&c__1, "CUNMQR", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2); + nb = f2cmax(i__1,nb3); +/* Computing MAX */ + i__1 = f2cmax(*n,*m); + lwkopt = f2cmax(i__1,*p) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*p < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*n), i__1 = f2cmax(i__1,*m); + if (*lwork < f2cmax(i__1,*p) && ! lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGQRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* QR factorization of N-by-M matrix A: A = Q*R */ + + cgeqrf_(n, m, &a[a_offset], lda, &taua[1], &work[1], lwork, info); + lopt = work[1].r; + +/* Update B := Q**H*B. */ + + i__1 = f2cmin(*n,*m); + cunmqr_("Left", "Conjugate Transpose", n, p, &i__1, &a[a_offset], lda, & + taua[1], &b[b_offset], ldb, &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1].r; + lopt = f2cmax(i__1,i__2); + +/* RQ factorization of N-by-P matrix B: B = T*Z. */ + + cgerqf_(n, p, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); +/* Computing MAX */ + i__2 = lopt, i__3 = (integer) work[1].r; + i__1 = f2cmax(i__2,i__3); + work[1].r = (real) i__1, work[1].i = 0.f; + + return 0; + +/* End of CGGQRF */ + +} /* cggqrf_ */ + diff --git a/lapack-netlib/SRC/cggrqf.c b/lapack-netlib/SRC/cggrqf.c new file mode 100644 index 000000000..f6ed5661a --- /dev/null +++ b/lapack-netlib/SRC/cggrqf.c @@ -0,0 +1,720 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGGRQF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGRQF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, */ +/* LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* COMPLEX A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGRQF computes a generalized RQ factorization of an M-by-N matrix A */ +/* > and a P-by-N matrix B: */ +/* > */ +/* > A = R*Q, B = Z*T*Q, */ +/* > */ +/* > where Q is an N-by-N unitary matrix, Z is a P-by-P unitary */ +/* > matrix, and R and T assume one of the forms: */ +/* > */ +/* > if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, */ +/* > N-M M ( R21 ) N */ +/* > N */ +/* > */ +/* > where R12 or R21 is upper triangular, and */ +/* > */ +/* > if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, */ +/* > ( 0 ) P-N P N-P */ +/* > N */ +/* > */ +/* > where T11 is upper triangular. */ +/* > */ +/* > In particular, if B is square and nonsingular, the GRQ factorization */ +/* > of A and B implicitly gives the RQ factorization of A*inv(B): */ +/* > */ +/* > A*inv(B) = (R*inv(T))*Z**H */ +/* > */ +/* > where inv(B) denotes the inverse of the matrix B, and Z**H denotes the */ +/* > conjugate transpose of the matrix Z. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, if M <= N, the upper triangle of the subarray */ +/* > A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; */ +/* > if M > N, the elements on and above the (M-N)-th subdiagonal */ +/* > contain the M-by-N upper trapezoidal matrix R; the remaining */ +/* > elements, with the array TAUA, represent the unitary */ +/* > matrix Q as a product of elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUA */ +/* > \verbatim */ +/* > TAUA is COMPLEX array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the unitary matrix Q (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(P,N)-by-N upper trapezoidal matrix T (T is */ +/* > upper triangular if P >= N); the elements below the diagonal, */ +/* > with the array TAUB, represent the unitary matrix Z as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUB */ +/* > \verbatim */ +/* > TAUB is COMPLEX array, dimension (f2cmin(P,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the unitary matrix Z (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */ +/* > For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */ +/* > where NB1 is the optimal blocksize for the RQ factorization */ +/* > of an M-by-N matrix, NB2 is the optimal blocksize for the */ +/* > QR factorization of a P-by-N matrix, and NB3 is the optimal */ +/* > blocksize for a call of CUNMRQ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO=-i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taua * v * v**H */ +/* > */ +/* > where taua is a complex scalar, and v is a complex vector with */ +/* > v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* > A(m-k+i,1:n-k+i-1), and taua in TAUA(i). */ +/* > To form Q explicitly, use LAPACK subroutine CUNGRQ. */ +/* > To use Q to update another matrix, use LAPACK subroutine CUNMRQ. */ +/* > */ +/* > The matrix Z is represented as a product of elementary reflectors */ +/* > */ +/* > Z = H(1) H(2) . . . H(k), where k = f2cmin(p,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taub * v * v**H */ +/* > */ +/* > where taub is a complex scalar, and v is a complex vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), */ +/* > and taub in TAUB(i). */ +/* > To form Z explicitly, use LAPACK subroutine CUNGQR. */ +/* > To use Z to update another matrix, use LAPACK subroutine CUNMQR. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cggrqf_(integer *m, integer *p, integer *n, complex *a, + integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, + complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer lopt, nb; + extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), cgerqf_( + integer *, integer *, complex *, integer *, complex *, complex *, + integer *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer nb1, nb2, nb3; + extern /* Subroutine */ int cunmrq_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --taua; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --taub; + --work; + + /* Function Body */ + *info = 0; + nb1 = ilaenv_(&c__1, "CGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb2 = ilaenv_(&c__1, "CGEQRF", " ", p, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb3 = ilaenv_(&c__1, "CUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2); + nb = f2cmax(i__1,nb3); +/* Computing MAX */ + i__1 = f2cmax(*n,*m); + lwkopt = f2cmax(i__1,*p) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*p < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*p)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m), i__1 = f2cmax(i__1,*p); + if (*lwork < f2cmax(i__1,*n) && ! lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGRQF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* RQ factorization of M-by-N matrix A: A = R*Q */ + + cgerqf_(m, n, &a[a_offset], lda, &taua[1], &work[1], lwork, info); + lopt = work[1].r; + +/* Update B := B*Q**H */ + + i__1 = f2cmin(*m,*n); +/* Computing MAX */ + i__2 = 1, i__3 = *m - *n + 1; + cunmrq_("Right", "Conjugate Transpose", p, n, &i__1, &a[f2cmax(i__2,i__3) + + a_dim1], lda, &taua[1], &b[b_offset], ldb, &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1].r; + lopt = f2cmax(i__1,i__2); + +/* QR factorization of P-by-N matrix B: B = Z*T */ + + cgeqrf_(p, n, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); +/* Computing MAX */ + i__2 = lopt, i__3 = (integer) work[1].r; + i__1 = f2cmax(i__2,i__3); + work[1].r = (real) i__1, work[1].i = 0.f; + + return 0; + +/* End of CGGRQF */ + +} /* cggrqf_ */ + diff --git a/lapack-netlib/SRC/cggsvd3.c b/lapack-netlib/SRC/cggsvd3.c new file mode 100644 index 000000000..0fedd43c8 --- /dev/null +++ b/lapack-netlib/SRC/cggsvd3.c @@ -0,0 +1,946 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGSVD3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, */ +/* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, */ +/* LWORK, RWORK, IWORK, INFO ) */ + +/* CHARACTER JOBQ, JOBU, JOBV */ +/* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */ +/* INTEGER IWORK( * ) */ +/* REAL ALPHA( * ), BETA( * ), RWORK( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ U( LDU, * ), V( LDV, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGSVD3 computes the generalized singular value decomposition (GSVD) */ +/* > of an M-by-N complex matrix A and P-by-N complex matrix B: */ +/* > */ +/* > U**H*A*Q = D1*( 0 R ), V**H*B*Q = D2*( 0 R ) */ +/* > */ +/* > where U, V and Q are unitary matrices. */ +/* > Let K+L = the effective numerical rank of the */ +/* > matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper */ +/* > triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" */ +/* > matrices and of the following structures, respectively: */ +/* > */ +/* > If M-K-L >= 0, */ +/* > */ +/* > K L */ +/* > D1 = K ( I 0 ) */ +/* > L ( 0 C ) */ +/* > M-K-L ( 0 0 ) */ +/* > */ +/* > K L */ +/* > D2 = L ( 0 S ) */ +/* > P-L ( 0 0 ) */ +/* > */ +/* > N-K-L K L */ +/* > ( 0 R ) = K ( 0 R11 R12 ) */ +/* > L ( 0 0 R22 ) */ +/* > */ +/* > where */ +/* > */ +/* > C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */ +/* > S = diag( BETA(K+1), ... , BETA(K+L) ), */ +/* > C**2 + S**2 = I. */ +/* > */ +/* > R is stored in A(1:K+L,N-K-L+1:N) on exit. */ +/* > */ +/* > If M-K-L < 0, */ +/* > */ +/* > K M-K K+L-M */ +/* > D1 = K ( I 0 0 ) */ +/* > M-K ( 0 C 0 ) */ +/* > */ +/* > K M-K K+L-M */ +/* > D2 = M-K ( 0 S 0 ) */ +/* > K+L-M ( 0 0 I ) */ +/* > P-L ( 0 0 0 ) */ +/* > */ +/* > N-K-L K M-K K+L-M */ +/* > ( 0 R ) = K ( 0 R11 R12 R13 ) */ +/* > M-K ( 0 0 R22 R23 ) */ +/* > K+L-M ( 0 0 0 R33 ) */ +/* > */ +/* > where */ +/* > */ +/* > C = diag( ALPHA(K+1), ... , ALPHA(M) ), */ +/* > S = diag( BETA(K+1), ... , BETA(M) ), */ +/* > C**2 + S**2 = I. */ +/* > */ +/* > (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored */ +/* > ( 0 R22 R23 ) */ +/* > in B(M-K+1:L,N+M-K-L+1:N) on exit. */ +/* > */ +/* > The routine computes C, S, R, and optionally the unitary */ +/* > transformation matrices U, V and Q. */ +/* > */ +/* > In particular, if B is an N-by-N nonsingular matrix, then the GSVD of */ +/* > A and B implicitly gives the SVD of A*inv(B): */ +/* > A*inv(B) = U*(D1*inv(D2))*V**H. */ +/* > If ( A**H,B**H)**H has orthonormal columns, then the GSVD of A and B is also */ +/* > equal to the CS decomposition of A and B. Furthermore, the GSVD can */ +/* > be used to derive the solution of the eigenvalue problem: */ +/* > A**H*A x = lambda* B**H*B x. */ +/* > In some literature, the GSVD of A and B is presented in the form */ +/* > U**H*A*X = ( 0 D1 ), V**H*B*X = ( 0 D2 ) */ +/* > where U and V are orthogonal and X is nonsingular, and D1 and D2 are */ +/* > ``diagonal''. The former GSVD form can be converted to the latter */ +/* > form by taking the nonsingular matrix X as */ +/* > */ +/* > X = Q*( I 0 ) */ +/* > ( 0 inv(R) ) */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > = 'U': Unitary matrix U is computed; */ +/* > = 'N': U is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > = 'V': Unitary matrix V is computed; */ +/* > = 'N': V is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBQ */ +/* > \verbatim */ +/* > JOBQ is CHARACTER*1 */ +/* > = 'Q': Unitary matrix Q is computed; */ +/* > = 'N': Q is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] L */ +/* > \verbatim */ +/* > L is INTEGER */ +/* > */ +/* > On exit, K and L specify the dimension of the subblocks */ +/* > described in Purpose. */ +/* > K + L = effective numerical rank of (A**H,B**H)**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A contains the triangular matrix R, or part of R. */ +/* > See Purpose for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, B contains part of the triangular matrix R if */ +/* > M-K-L < 0. See Purpose for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is REAL array, dimension (N) */ +/* > */ +/* > On exit, ALPHA and BETA contain the generalized singular */ +/* > value pairs of A and B; */ +/* > ALPHA(1:K) = 1, */ +/* > BETA(1:K) = 0, */ +/* > and if M-K-L >= 0, */ +/* > ALPHA(K+1:K+L) = C, */ +/* > BETA(K+1:K+L) = S, */ +/* > or if M-K-L < 0, */ +/* > ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 */ +/* > BETA(K+1:M) =S, BETA(M+1:K+L) =1 */ +/* > and */ +/* > ALPHA(K+L+1:N) = 0 */ +/* > BETA(K+L+1:N) = 0 */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is COMPLEX array, dimension (LDU,M) */ +/* > If JOBU = 'U', U contains the M-by-M unitary matrix U. */ +/* > If JOBU = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= f2cmax(1,M) if */ +/* > JOBU = 'U'; LDU >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is COMPLEX array, dimension (LDV,P) */ +/* > If JOBV = 'V', V contains the P-by-P unitary matrix V. */ +/* > If JOBV = 'N', V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,P) if */ +/* > JOBV = 'V'; LDV >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX array, dimension (LDQ,N) */ +/* > If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. */ +/* > If JOBQ = 'N', Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */ +/* > JOBQ = 'Q'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > On exit, IWORK stores the sorting information. More */ +/* > precisely, the following loop will sort ALPHA */ +/* > for I = K+1, f2cmin(M,K+L) */ +/* > swap ALPHA(I) and ALPHA(IWORK(I)) */ +/* > endfor */ +/* > such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, the Jacobi-type procedure failed to */ +/* > converge. For further details, see subroutine CTGSJA. */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > TOLA REAL */ +/* > TOLB REAL */ +/* > TOLA and TOLB are the thresholds to determine the effective */ +/* > rank of (A**H,B**H)**H. Generally, they are set to */ +/* > TOLA = MAX(M,N)*norm(A)*MACHEPS, */ +/* > TOLB = MAX(P,N)*norm(B)*MACHEPS. */ +/* > The size of TOLA and TOLB may affect the size of backward */ +/* > errors of the decomposition. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date August 2015 */ + +/* > \ingroup complexGEsing */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > CGGSVD3 replaces the deprecated subroutine CGGSVD. */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cggsvd3_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, complex *a, integer * + lda, complex *b, integer *ldb, real *alpha, real *beta, complex *u, + integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, + complex *work, integer *lwork, real *rwork, integer *iwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, + u_offset, v_dim1, v_offset, i__1, i__2; + complex q__1; + + /* Local variables */ + integer ibnd; + real tola; + integer isub; + real tolb, unfl, temp, smax; + integer ncallmycycle, i__, j; + extern logical lsame_(char *, char *); + real anorm, bnorm; + logical wantq; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantu, wantv; + extern real clange_(char *, integer *, integer *, complex *, integer *, + real *), slamch_(char *); + extern /* Subroutine */ int ctgsja_(char *, char *, char *, integer *, + integer *, integer *, integer *, integer *, complex *, integer *, + complex *, integer *, real *, real *, real *, real *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, integer *), xerbla_(char *, + integer *, ftnlen); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int cggsvp3_(char *, char *, char *, integer *, + integer *, integer *, complex *, integer *, complex *, integer *, + real *, real *, integer *, integer *, complex *, integer *, + complex *, integer *, complex *, integer *, integer *, real *, + complex *, complex *, integer *, integer *); + real ulp; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* August 2015 */ + + +/* ===================================================================== */ + + +/* Decode and test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantu = lsame_(jobu, "U"); + wantv = lsame_(jobv, "V"); + wantq = lsame_(jobq, "Q"); + lquery = *lwork == -1; + lwkopt = 1; + +/* Test the input arguments */ + + *info = 0; + if (! (wantu || lsame_(jobu, "N"))) { + *info = -1; + } else if (! (wantv || lsame_(jobv, "N"))) { + *info = -2; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*p < 0) { + *info = -6; + } else if (*lda < f2cmax(1,*m)) { + *info = -10; + } else if (*ldb < f2cmax(1,*p)) { + *info = -12; + } else if (*ldu < 1 || wantu && *ldu < *m) { + *info = -16; + } else if (*ldv < 1 || wantv && *ldv < *p) { + *info = -18; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -20; + } else if (*lwork < 1 && ! lquery) { + *info = -24; + } + +/* Compute workspace */ + + if (*info == 0) { + cggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], + ldb, &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, + &q[q_offset], ldq, &iwork[1], &rwork[1], &work[1], &work[1], + &c_n1, info); + lwkopt = *n + (integer) work[1].r; +/* Computing MAX */ + i__1 = *n << 1; + lwkopt = f2cmax(i__1,lwkopt); + lwkopt = f2cmax(1,lwkopt); + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGSVD3", &i__1, (ftnlen)7); + return 0; + } + if (lquery) { + return 0; + } + +/* Compute the Frobenius norm of matrices A and B */ + + anorm = clange_("1", m, n, &a[a_offset], lda, &rwork[1]); + bnorm = clange_("1", p, n, &b[b_offset], ldb, &rwork[1]); + +/* Get machine precision and set up threshold for determining */ +/* the effective numerical rank of the matrices A and B. */ + + ulp = slamch_("Precision"); + unfl = slamch_("Safe Minimum"); + tola = f2cmax(*m,*n) * f2cmax(anorm,unfl) * ulp; + tolb = f2cmax(*p,*n) * f2cmax(bnorm,unfl) * ulp; + + i__1 = *lwork - *n; + cggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], ldb, + &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, &q[ + q_offset], ldq, &iwork[1], &rwork[1], &work[1], &work[*n + 1], & + i__1, info); + +/* Compute the GSVD of two upper "triangular" matrices */ + + ctgsja_(jobu, jobv, jobq, m, p, n, k, l, &a[a_offset], lda, &b[b_offset], + ldb, &tola, &tolb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[ + v_offset], ldv, &q[q_offset], ldq, &work[1], &ncallmycycle, info); + +/* Sort the singular values and store the pivot indices in IWORK */ +/* Copy ALPHA to RWORK, then sort ALPHA in RWORK */ + + scopy_(n, &alpha[1], &c__1, &rwork[1], &c__1); +/* Computing MIN */ + i__1 = *l, i__2 = *m - *k; + ibnd = f2cmin(i__1,i__2); + i__1 = ibnd; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for largest ALPHA(K+I) */ + + isub = i__; + smax = rwork[*k + i__]; + i__2 = ibnd; + for (j = i__ + 1; j <= i__2; ++j) { + temp = rwork[*k + j]; + if (temp > smax) { + isub = j; + smax = temp; + } +/* L10: */ + } + if (isub != i__) { + rwork[*k + isub] = rwork[*k + i__]; + rwork[*k + i__] = smax; + iwork[*k + i__] = *k + isub; + } else { + iwork[*k + i__] = *k + i__; + } +/* L20: */ + } + + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + return 0; + +/* End of CGGSVD3 */ + +} /* cggsvd3_ */ + diff --git a/lapack-netlib/SRC/cggsvp3.c b/lapack-netlib/SRC/cggsvp3.c new file mode 100644 index 000000000..291ecfcf1 --- /dev/null +++ b/lapack-netlib/SRC/cggsvp3.c @@ -0,0 +1,1070 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGGSVP3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGGSVP3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, */ +/* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, */ +/* IWORK, RWORK, TAU, WORK, LWORK, INFO ) */ + +/* CHARACTER JOBQ, JOBU, JOBV */ +/* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */ +/* REAL TOLA, TOLB */ +/* INTEGER IWORK( * ) */ +/* REAL RWORK( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGGSVP3 computes unitary matrices U, V and Q such that */ +/* > */ +/* > N-K-L K L */ +/* > U**H*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; */ +/* > L ( 0 0 A23 ) */ +/* > M-K-L ( 0 0 0 ) */ +/* > */ +/* > N-K-L K L */ +/* > = K ( 0 A12 A13 ) if M-K-L < 0; */ +/* > M-K ( 0 0 A23 ) */ +/* > */ +/* > N-K-L K L */ +/* > V**H*B*Q = L ( 0 0 B13 ) */ +/* > P-L ( 0 0 0 ) */ +/* > */ +/* > where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */ +/* > upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */ +/* > otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective */ +/* > numerical rank of the (M+P)-by-N matrix (A**H,B**H)**H. */ +/* > */ +/* > This decomposition is the preprocessing step for computing the */ +/* > Generalized Singular Value Decomposition (GSVD), see subroutine */ +/* > CGGSVD3. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > = 'U': Unitary matrix U is computed; */ +/* > = 'N': U is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > = 'V': Unitary matrix V is computed; */ +/* > = 'N': V is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBQ */ +/* > \verbatim */ +/* > JOBQ is CHARACTER*1 */ +/* > = 'Q': Unitary matrix Q is computed; */ +/* > = 'N': Q is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A contains the triangular (or trapezoidal) matrix */ +/* > described in the Purpose section. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, B contains the triangular matrix described in */ +/* > the Purpose section. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOLA */ +/* > \verbatim */ +/* > TOLA is REAL */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOLB */ +/* > \verbatim */ +/* > TOLB is REAL */ +/* > */ +/* > TOLA and TOLB are the thresholds to determine the effective */ +/* > numerical rank of matrix B and a subblock of A. Generally, */ +/* > they are set to */ +/* > TOLA = MAX(M,N)*norm(A)*MACHEPS, */ +/* > TOLB = MAX(P,N)*norm(B)*MACHEPS. */ +/* > The size of TOLA and TOLB may affect the size of backward */ +/* > errors of the decomposition. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] L */ +/* > \verbatim */ +/* > L is INTEGER */ +/* > */ +/* > On exit, K and L specify the dimension of the subblocks */ +/* > described in Purpose section. */ +/* > K + L = effective numerical rank of (A**H,B**H)**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is COMPLEX array, dimension (LDU,M) */ +/* > If JOBU = 'U', U contains the unitary matrix U. */ +/* > If JOBU = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= f2cmax(1,M) if */ +/* > JOBU = 'U'; LDU >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is COMPLEX array, dimension (LDV,P) */ +/* > If JOBV = 'V', V contains the unitary matrix V. */ +/* > If JOBV = 'N', V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,P) if */ +/* > JOBV = 'V'; LDV >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX array, dimension (LDQ,N) */ +/* > If JOBQ = 'Q', Q contains the unitary matrix Q. */ +/* > If JOBQ = 'N', Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */ +/* > JOBQ = 'Q'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date August 2015 */ + +/* > \ingroup complexOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The subroutine uses LAPACK subroutine CGEQP3 for the QR factorization */ +/* > with column pivoting to detect the effective numerical rank of the */ +/* > a matrix. It may be replaced by a better rank determination strategy. */ +/* > */ +/* > CGGSVP3 replaces the deprecated subroutine CGGSVP. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cggsvp3_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, complex *a, integer *lda, complex *b, integer + *ldb, real *tola, real *tolb, integer *k, integer *l, complex *u, + integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, + integer *iwork, real *rwork, complex *tau, complex *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, + u_offset, v_dim1, v_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *); + logical wantq, wantu, wantv; + extern /* Subroutine */ int cgeqp3_(integer *, integer *, complex *, + integer *, integer *, complex *, complex *, integer *, real *, + integer *), cgeqr2_(integer *, integer *, complex *, integer *, + complex *, complex *, integer *), cgerq2_(integer *, integer *, + complex *, integer *, complex *, complex *, integer *), cung2r_( + integer *, integer *, integer *, complex *, integer *, complex *, + complex *, integer *), cunm2r_(char *, char *, integer *, integer + *, integer *, complex *, integer *, complex *, complex *, integer + *, complex *, integer *), cunmr2_(char *, char *, + integer *, integer *, integer *, complex *, integer *, complex *, + complex *, integer *, complex *, integer *), + clacpy_(char *, integer *, integer *, complex *, integer *, + complex *, integer *), claset_(char *, integer *, integer + *, complex *, complex *, complex *, integer *), xerbla_( + char *, integer *, ftnlen), clapmt_(logical *, integer *, integer + *, complex *, integer *, integer *); + logical forwrd; + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* August 2015 */ + + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --iwork; + --rwork; + --tau; + --work; + + /* Function Body */ + wantu = lsame_(jobu, "U"); + wantv = lsame_(jobv, "V"); + wantq = lsame_(jobq, "Q"); + forwrd = TRUE_; + lquery = *lwork == -1; + lwkopt = 1; + +/* Test the input arguments */ + + *info = 0; + if (! (wantu || lsame_(jobu, "N"))) { + *info = -1; + } else if (! (wantv || lsame_(jobv, "N"))) { + *info = -2; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*p < 0) { + *info = -5; + } else if (*n < 0) { + *info = -6; + } else if (*lda < f2cmax(1,*m)) { + *info = -8; + } else if (*ldb < f2cmax(1,*p)) { + *info = -10; + } else if (*ldu < 1 || wantu && *ldu < *m) { + *info = -16; + } else if (*ldv < 1 || wantv && *ldv < *p) { + *info = -18; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -20; + } else if (*lwork < 1 && ! lquery) { + *info = -24; + } + +/* Compute workspace */ + + if (*info == 0) { + cgeqp3_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], &c_n1, + &rwork[1], info); + lwkopt = (integer) work[1].r; + if (wantv) { + lwkopt = f2cmax(lwkopt,*p); + } +/* Computing MAX */ + i__1 = lwkopt, i__2 = f2cmin(*n,*p); + lwkopt = f2cmax(i__1,i__2); + lwkopt = f2cmax(lwkopt,*m); + if (wantq) { + lwkopt = f2cmax(lwkopt,*n); + } + cgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], &c_n1, + &rwork[1], info); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[1].r; + lwkopt = f2cmax(i__1,i__2); + lwkopt = f2cmax(1,lwkopt); + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGGSVP3", &i__1, (ftnlen)7); + return 0; + } + if (lquery) { + return 0; + } + +/* QR with column pivoting of B: B*P = V*( S11 S12 ) */ +/* ( 0 0 ) */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; +/* L10: */ + } + cgeqp3_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], lwork, & + rwork[1], info); + +/* Update A := A*P */ + + clapmt_(&forwrd, m, n, &a[a_offset], lda, &iwork[1]); + +/* Determine the effective rank of matrix B. */ + + *l = 0; + i__1 = f2cmin(*p,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + if (c_abs(&b[i__ + i__ * b_dim1]) > *tolb) { + ++(*l); + } +/* L20: */ + } + + if (wantv) { + +/* Copy the details of V, and form V. */ + + claset_("Full", p, p, &c_b1, &c_b1, &v[v_offset], ldv); + if (*p > 1) { + i__1 = *p - 1; + clacpy_("Lower", &i__1, n, &b[b_dim1 + 2], ldb, &v[v_dim1 + 2], + ldv); + } + i__1 = f2cmin(*p,*n); + cung2r_(p, p, &i__1, &v[v_offset], ldv, &tau[1], &work[1], info); + } + +/* Clean up B */ + + i__1 = *l - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *l; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + if (*p > *l) { + i__1 = *p - *l; + claset_("Full", &i__1, n, &c_b1, &c_b1, &b[*l + 1 + b_dim1], ldb); + } + + if (wantq) { + +/* Set Q = I and Update Q := Q*P */ + + claset_("Full", n, n, &c_b1, &c_b2, &q[q_offset], ldq); + clapmt_(&forwrd, n, n, &q[q_offset], ldq, &iwork[1]); + } + + if (*p >= *l && *n != *l) { + +/* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z */ + + cgerq2_(l, n, &b[b_offset], ldb, &tau[1], &work[1], info); + +/* Update A := A*Z**H */ + + cunmr2_("Right", "Conjugate transpose", m, n, l, &b[b_offset], ldb, & + tau[1], &a[a_offset], lda, &work[1], info); + if (wantq) { + +/* Update Q := Q*Z**H */ + + cunmr2_("Right", "Conjugate transpose", n, n, l, &b[b_offset], + ldb, &tau[1], &q[q_offset], ldq, &work[1], info); + } + +/* Clean up B */ + + i__1 = *n - *l; + claset_("Full", l, &i__1, &c_b1, &c_b1, &b[b_offset], ldb); + i__1 = *n; + for (j = *n - *l + 1; j <= i__1; ++j) { + i__2 = *l; + for (i__ = j - *n + *l + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; +/* L50: */ + } +/* L60: */ + } + + } + +/* Let N-L L */ +/* A = ( A11 A12 ) M, */ + +/* then the following does the complete QR decomposition of A11: */ + +/* A11 = U*( 0 T12 )*P1**H */ +/* ( 0 0 ) */ + + i__1 = *n - *l; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; +/* L70: */ + } + i__1 = *n - *l; + cgeqp3_(m, &i__1, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], lwork, + &rwork[1], info); + +/* Determine the effective rank of A11 */ + + *k = 0; +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = f2cmin(i__2,i__3); + for (i__ = 1; i__ <= i__1; ++i__) { + if (c_abs(&a[i__ + i__ * a_dim1]) > *tola) { + ++(*k); + } +/* L80: */ + } + +/* Update A12 := U**H*A12, where A12 = A( 1:M, N-L+1:N ) */ + +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = f2cmin(i__2,i__3); + cunm2r_("Left", "Conjugate transpose", m, l, &i__1, &a[a_offset], lda, & + tau[1], &a[(*n - *l + 1) * a_dim1 + 1], lda, &work[1], info); + + if (wantu) { + +/* Copy the details of U, and form U */ + + claset_("Full", m, m, &c_b1, &c_b1, &u[u_offset], ldu); + if (*m > 1) { + i__1 = *m - 1; + i__2 = *n - *l; + clacpy_("Lower", &i__1, &i__2, &a[a_dim1 + 2], lda, &u[u_dim1 + 2] + , ldu); + } +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = f2cmin(i__2,i__3); + cung2r_(m, m, &i__1, &u[u_offset], ldu, &tau[1], &work[1], info); + } + + if (wantq) { + +/* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 */ + + i__1 = *n - *l; + clapmt_(&forwrd, n, &i__1, &q[q_offset], ldq, &iwork[1]); + } + +/* Clean up A: set the strictly lower triangular part of */ +/* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. */ + + i__1 = *k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L90: */ + } +/* L100: */ + } + if (*m > *k) { + i__1 = *m - *k; + i__2 = *n - *l; + claset_("Full", &i__1, &i__2, &c_b1, &c_b1, &a[*k + 1 + a_dim1], lda); + } + + if (*n - *l > *k) { + +/* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 */ + + i__1 = *n - *l; + cgerq2_(k, &i__1, &a[a_offset], lda, &tau[1], &work[1], info); + + if (wantq) { + +/* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**H */ + + i__1 = *n - *l; + cunmr2_("Right", "Conjugate transpose", n, &i__1, k, &a[a_offset], + lda, &tau[1], &q[q_offset], ldq, &work[1], info); + } + +/* Clean up A */ + + i__1 = *n - *l - *k; + claset_("Full", k, &i__1, &c_b1, &c_b1, &a[a_offset], lda); + i__1 = *n - *l; + for (j = *n - *l - *k + 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = j - *n + *l + *k + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L110: */ + } +/* L120: */ + } + + } + + if (*m > *k) { + +/* QR factorization of A( K+1:M,N-L+1:N ) */ + + i__1 = *m - *k; + cgeqr2_(&i__1, l, &a[*k + 1 + (*n - *l + 1) * a_dim1], lda, &tau[1], & + work[1], info); + + if (wantu) { + +/* Update U(:,K+1:M) := U(:,K+1:M)*U1 */ + + i__1 = *m - *k; +/* Computing MIN */ + i__3 = *m - *k; + i__2 = f2cmin(i__3,*l); + cunm2r_("Right", "No transpose", m, &i__1, &i__2, &a[*k + 1 + (*n + - *l + 1) * a_dim1], lda, &tau[1], &u[(*k + 1) * u_dim1 + + 1], ldu, &work[1], info); + } + +/* Clean up */ + + i__1 = *n; + for (j = *n - *l + 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j - *n + *k + *l + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L130: */ + } +/* L140: */ + } + + } + + q__1.r = (real) lwkopt, q__1.i = 0.f; + work[1].r = q__1.r, work[1].i = q__1.i; + return 0; + +/* End of CGGSVP3 */ + +} /* cggsvp3_ */ + diff --git a/lapack-netlib/SRC/cgsvj0.c b/lapack-netlib/SRC/cgsvj0.c new file mode 100644 index 000000000..5e8b351c6 --- /dev/null +++ b/lapack-netlib/SRC/cgsvj0.c @@ -0,0 +1,1542 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGSVJ0 pre-processor for the routine cgesvj. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGSVJ0 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, */ +/* SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP */ +/* REAL EPS, SFMIN, TOL */ +/* CHARACTER*1 JOBV */ +/* COMPLEX A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) */ +/* REAL SVA( N ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGSVJ0 is called from CGESVJ as a pre-processor and that is its main */ +/* > purpose. It applies Jacobi rotations in the same way as CGESVJ does, but */ +/* > it does not check convergence (stopping criterion). Few tuning */ +/* > parameters (marked by [TP]) are available for the implementer. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether the output from this procedure is used */ +/* > to compute the matrix V: */ +/* > = 'V': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the N-by-N array V. */ +/* > (See the description of V.) */ +/* > = 'A': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the MV-by-N array V. */ +/* > (See the descriptions of MV and V.) */ +/* > = 'N': the Jacobi rotations are not accumulated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. */ +/* > M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, M-by-N matrix A, such that A*diag(D) represents */ +/* > the input matrix. */ +/* > On exit, */ +/* > A_onexit * diag(D_onexit) represents the input matrix A*diag(D) */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of D, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > The array D accumulates the scaling factors from the complex scaled */ +/* > Jacobi rotations. */ +/* > On entry, A*diag(D) represents the input matrix. */ +/* > On exit, A_onexit*diag(D_onexit) represents the input matrix */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of A, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SVA */ +/* > \verbatim */ +/* > SVA is REAL array, dimension (N) */ +/* > On entry, SVA contains the Euclidean norms of the columns of */ +/* > the matrix A*diag(D). */ +/* > On exit, SVA contains the Euclidean norms of the columns of */ +/* > the matrix A_onexit*diag(D_onexit). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MV */ +/* > \verbatim */ +/* > MV is INTEGER */ +/* > If JOBV = 'A', then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then MV is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is COMPLEX array, dimension (LDV,N) */ +/* > If JOBV = 'V' then N rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'A' then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V', LDV >= N. */ +/* > If JOBV = 'A', LDV >= MV. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EPS */ +/* > \verbatim */ +/* > EPS is REAL */ +/* > EPS = SLAMCH('Epsilon') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SFMIN */ +/* > \verbatim */ +/* > SFMIN is REAL */ +/* > SFMIN = SLAMCH('Safe Minimum') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOL */ +/* > \verbatim */ +/* > TOL is REAL */ +/* > TOL is the threshold for Jacobi rotations. For a pair */ +/* > A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ +/* > applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NSWEEP */ +/* > \verbatim */ +/* > NSWEEP is INTEGER */ +/* > NSWEEP is the number of sweeps of Jacobi rotations to be */ +/* > performed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > LWORK is the dimension of WORK. LWORK >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > CGSVJ0 is used just to enable CGESVJ to call a simplified version of */ +/* > itself to work on a submatrix of the original matrix. */ +/* > */ +/* > \par Contributor: */ +/* ================== */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) */ +/* > */ +/* > \par Bugs, Examples and Comments: */ +/* ================================= */ +/* > */ +/* > Please report all bugs and send interesting test examples and comments to */ +/* > drmac@math.hr. Thank you. */ + +/* ===================================================================== */ +/* Subroutine */ int cgsvj0_(char *jobv, integer *m, integer *n, complex *a, + integer *lda, complex *d__, real *sva, integer *mv, complex *v, + integer *ldv, real *eps, real *sfmin, real *tol, integer *nsweep, + complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7; + real r__1, r__2; + complex q__1, q__2, q__3; + + /* Local variables */ + real aapp; + complex aapq; + real aaqq; + integer ierr; + real bigtheta; + extern /* Subroutine */ int crot_(integer *, complex *, integer *, + complex *, integer *, real *, complex *); + complex ompq; + integer pskipped; + real aapp0, aapq1, temp1; + integer i__, p, q; + real t; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + real apoaq, aqoap; + extern logical lsame_(char *, char *); + real theta, small; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), cswap_(integer *, complex *, integer *, + complex *, integer *); + logical applv, rsvec; + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + logical rotok; + real rootsfmin; + extern real scnrm2_(integer *, complex *, integer *); + real cs, sn; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + integer ijblsk, swband; + extern integer isamax_(integer *, real *, integer *); + integer blskip; + extern /* Subroutine */ int classq_(integer *, complex *, integer *, real + *, real *); + real mxaapq, thsign, mxsinj; + integer ir1, emptsw, notrot, iswrot, jbc; + real big; + integer kbl, lkahead, igl, ibr, jgl, nbl, mvl; + real rootbig, rooteps; + integer rowskip; + real roottol; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + +/* from BLAS */ +/* from LAPACK */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --sva; + --d__; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + + /* Function Body */ + applv = lsame_(jobv, "A"); + rsvec = lsame_(jobv, "V"); + if (! (rsvec || applv || lsame_(jobv, "N"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || *n > *m) { + *info = -3; + } else if (*lda < *m) { + *info = -5; + } else if ((rsvec || applv) && *mv < 0) { + *info = -8; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -10; + } else if (*tol <= *eps) { + *info = -13; + } else if (*nsweep < 0) { + *info = -14; + } else if (*lwork < *m) { + *info = -16; + } else { + *info = 0; + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGSVJ0", &i__1, (ftnlen)6); + return 0; + } + + if (rsvec) { + mvl = *n; + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + rooteps = sqrt(*eps); + rootsfmin = sqrt(*sfmin); + small = *sfmin / *eps; + big = 1.f / *sfmin; + rootbig = 1.f / rootsfmin; + bigtheta = 1.f / rooteps; + roottol = sqrt(*tol); + + + emptsw = *n * (*n - 1) / 2; + notrot = 0; + + + swband = 0; +/* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */ +/* if CGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm CGEJSV. For sweeps i=1:SWBAND the procedure */ +/* works on pivots inside a band-like region around the diagonal. */ +/* The boundaries are determined dynamically, based on the number of */ +/* pivots above a threshold. */ + + kbl = f2cmin(8,*n); +/* [TP] KBL is a tuning parameter that defines the tile size in the */ +/* tiling of the p-q loops of pivot pairs. In general, an optimal */ +/* value of KBL depends on the matrix dimensions and on the */ +/* parameters of the computer's memory. */ + + nbl = *n / kbl; + if (nbl * kbl != *n) { + ++nbl; + } + +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + + rowskip = f2cmin(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + + lkahead = 1; +/* [TP] LKAHEAD is a tuning parameter. */ + +/* Quasi block transformations, using the lower (upper) triangular */ +/* structure of the input matrix. The quasi-block-cycling usually */ +/* invokes cubic convergence. Big part of this cycle is done inside */ +/* canonical subspaces of dimensions less than M. */ + + + + i__1 = *nsweep; + for (i__ = 1; i__ <= i__1; ++i__) { + + + mxaapq = 0.f; + mxsinj = 0.f; + iswrot = 0; + + notrot = 0; + pskipped = 0; + +/* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */ +/* 1 <= p < q <= N. This is the first step toward a blocked implementation */ +/* of the rotations. New implementation, based on block transformations, */ +/* is under development. */ + + i__2 = nbl; + for (ibr = 1; ibr <= i__2; ++ibr) { + + igl = (ibr - 1) * kbl + 1; + +/* Computing MIN */ + i__4 = lkahead, i__5 = nbl - ibr; + i__3 = f2cmin(i__4,i__5); + for (ir1 = 0; ir1 <= i__3; ++ir1) { + + igl += ir1 * kbl; + +/* Computing MIN */ + i__5 = igl + kbl - 1, i__6 = *n - 1; + i__4 = f2cmin(i__5,i__6); + for (p = igl; p <= i__4; ++p) { + + + i__5 = *n - p + 1; + q = isamax_(&i__5, &sva[p], &c__1) + p - 1; + if (p != q) { + cswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + + 1], &c__1); + if (rsvec) { + cswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1); + } + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + i__5 = p; + aapq.r = d__[i__5].r, aapq.i = d__[i__5].i; + i__5 = p; + i__6 = q; + d__[i__5].r = d__[i__6].r, d__[i__5].i = d__[i__6].i; + i__5 = q; + d__[i__5].r = aapq.r, d__[i__5].i = aapq.i; + } + + if (ir1 == 0) { + +/* Column norms are periodically updated by explicit */ +/* norm computation. */ +/* Caveat: */ +/* Unfortunately, some BLAS implementations compute SNCRM2(M,A(1,p),1) */ +/* as SQRT(S=CDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to */ +/* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to */ +/* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). */ +/* Hence, SCNRM2 cannot be trusted, not even in the case when */ +/* the true norm is far from the under(over)flow boundaries. */ +/* If properly implemented SCNRM2 is available, the IF-THEN-ELSE-END IF */ +/* below should be replaced with "AAPP = SCNRM2( M, A(1,p), 1 )". */ + + if (sva[p] < rootbig && sva[p] > rootsfmin) { + sva[p] = scnrm2_(m, &a[p * a_dim1 + 1], &c__1); + } else { + temp1 = 0.f; + aapp = 1.f; + classq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & + aapp); + sva[p] = temp1 * sqrt(aapp); + } + aapp = sva[p]; + } else { + aapp = sva[p]; + } + + if (aapp > 0.f) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = igl + kbl - 1; + i__5 = f2cmin(i__6,*n); + for (q = p + 1; q <= i__5; ++q) { + + aaqq = sva[q]; + + if (aaqq > 0.f) { + + aapp0 = aapp; + if (aaqq >= 1.f) { + rotok = small * aapp <= aaqq; + if (aapp < big / aaqq) { + cdotc_(&q__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + q__2.r = q__3.r / aaqq, q__2.i = + q__3.i / aaqq; + q__1.r = q__2.r / aapp, q__1.i = + q__2.i / aapp; + aapq.r = q__1.r, aapq.i = q__1.i; + } else { + ccopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + clascl_("G", &c__0, &c__0, &aapp, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + cdotc_(&q__2, m, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + q__1.r = q__2.r / aaqq, q__1.i = + q__2.i / aaqq; + aapq.r = q__1.r, aapq.i = q__1.i; + } + } else { + rotok = aapp <= aaqq / small; + if (aapp > small / aaqq) { + cdotc_(&q__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + q__2.r = q__3.r / aapp, q__2.i = + q__3.i / aapp; + q__1.r = q__2.r / aaqq, q__1.i = + q__2.i / aaqq; + aapq.r = q__1.r, aapq.i = q__1.i; + } else { + ccopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + clascl_("G", &c__0, &c__0, &aaqq, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + cdotc_(&q__2, m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + q__1.r = q__2.r / aapp, q__1.i = + q__2.i / aapp; + aapq.r = q__1.r, aapq.i = q__1.i; + } + } + +/* AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q) */ + aapq1 = -c_abs(&aapq); +/* Computing MAX */ + r__1 = mxaapq, r__2 = -aapq1; + mxaapq = f2cmax(r__1,r__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq1) > *tol) { + r__1 = c_abs(&aapq); + q__1.r = aapq.r / r__1, q__1.i = aapq.i / + r__1; + ompq.r = q__1.r, ompq.i = q__1.i; + +/* [RTD] ROTATED = ROTATED + ONE */ + + if (ir1 == 0) { + notrot = 0; + pskipped = 0; + ++iswrot; + } + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (r__1 = aqoap - apoaq, abs( + r__1)) * -.5f / aapq1; + + if (abs(theta) > bigtheta) { + + t = .5f / theta; + cs = 1.f; + r_cnjg(&q__2, &ompq); + q__1.r = t * q__2.r, q__1.i = t * + q__2.i; + crot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &q__1); + if (rsvec) { + r_cnjg(&q__2, &ompq); + q__1.r = t * q__2.r, q__1.i = t * q__2.i; + crot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &q__1); + } +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq1 + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq1; + aapp *= sqrt((f2cmax(r__1,r__2))); +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(t); + mxsinj = f2cmax(r__1,r__2); + + } else { + + + thsign = -r_sign(&c_b27, &aapq1); + t = 1.f / (theta + thsign * sqrt( + theta * theta + 1.f)); + cs = sqrt(1.f / (t * t + 1.f)); + sn = t * cs; + +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(sn); + mxsinj = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq1 + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq1; + aapp *= sqrt((f2cmax(r__1,r__2))); + + r_cnjg(&q__2, &ompq); + q__1.r = sn * q__2.r, q__1.i = sn + * q__2.i; + crot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &q__1); + if (rsvec) { + r_cnjg(&q__2, &ompq); + q__1.r = sn * q__2.r, q__1.i = sn * q__2.i; + crot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &q__1); + } + } + i__6 = p; + i__7 = q; + q__2.r = -d__[i__7].r, q__2.i = -d__[ + i__7].i; + q__1.r = q__2.r * ompq.r - q__2.i * + ompq.i, q__1.i = q__2.r * + ompq.i + q__2.i * ompq.r; + d__[i__6].r = q__1.r, d__[i__6].i = + q__1.i; + + } else { + ccopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + clascl_("G", &c__0, &c__0, &aapp, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + clascl_("G", &c__0, &c__0, &aaqq, & + c_b27, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + q__1.r = -aapq.r, q__1.i = -aapq.i; + caxpy_(m, &q__1, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + clascl_("G", &c__0, &c__0, &c_b27, & + aaqq, m, &c__1, &a[q * a_dim1 + + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq1 * + aapq1; + sva[q] = aaqq * sqrt((f2cmax(r__1,r__2))) + ; + mxsinj = f2cmax(mxsinj,*sfmin); + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* recompute SVA(q), SVA(p). */ + +/* Computing 2nd power */ + r__1 = sva[q] / aaqq; + if (r__1 * r__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = scnrm2_(m, &a[q * a_dim1 + + 1], &c__1); + } else { + t = 0.f; + aaqq = 1.f; + classq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq); + } + } + if (aapp / aapp0 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = scnrm2_(m, &a[p * a_dim1 + + 1], &c__1); + } else { + t = 0.f; + aapp = 1.f; + classq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp); + } + sva[p] = aapp; + } + + } else { +/* A(:,p) and A(:,q) already numerically orthogonal */ + if (ir1 == 0) { + ++notrot; + } +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + } + } else { +/* A(:,q) is zero column */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + + if (i__ <= swband && pskipped > rowskip) { + if (ir1 == 0) { + aapp = -aapp; + } + notrot = 0; + goto L2103; + } + +/* L2002: */ + } +/* END q-LOOP */ + +L2103: +/* bailed out of q-loop */ + + sva[p] = aapp; + + } else { + sva[p] = aapp; + if (ir1 == 0 && aapp == 0.f) { +/* Computing MIN */ + i__5 = igl + kbl - 1; + notrot = notrot + f2cmin(i__5,*n) - p; + } + } + +/* L2001: */ + } +/* end of the p-loop */ +/* end of doing the block ( ibr, ibr ) */ +/* L1002: */ + } +/* end of ir1-loop */ + +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + + i__3 = nbl; + for (jbc = ibr + 1; jbc <= i__3; ++jbc) { + + jgl = (jbc - 1) * kbl + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = f2cmin(i__5,*n); + for (p = igl; p <= i__4; ++p) { + + aapp = sva[p]; + if (aapp > 0.f) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = jgl + kbl - 1; + i__5 = f2cmin(i__6,*n); + for (q = jgl; q <= i__5; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.f) { + aapp0 = aapp; + + +/* Safe Gram matrix computation */ + + if (aaqq >= 1.f) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + cdotc_(&q__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + q__2.r = q__3.r / aaqq, q__2.i = + q__3.i / aaqq; + q__1.r = q__2.r / aapp, q__1.i = + q__2.i / aapp; + aapq.r = q__1.r, aapq.i = q__1.i; + } else { + ccopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + clascl_("G", &c__0, &c__0, &aapp, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + cdotc_(&q__2, m, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + q__1.r = q__2.r / aaqq, q__1.i = + q__2.i / aaqq; + aapq.r = q__1.r, aapq.i = q__1.i; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + cdotc_(&q__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + r__1 = f2cmax(aaqq,aapp); + q__2.r = q__3.r / r__1, q__2.i = + q__3.i / r__1; + r__2 = f2cmin(aaqq,aapp); + q__1.r = q__2.r / r__2, q__1.i = + q__2.i / r__2; + aapq.r = q__1.r, aapq.i = q__1.i; + } else { + ccopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + clascl_("G", &c__0, &c__0, &aaqq, & + c_b27, m, &c__1, &work[1], + lda, &ierr); + cdotc_(&q__2, m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + q__1.r = q__2.r / aapp, q__1.i = + q__2.i / aapp; + aapq.r = q__1.r, aapq.i = q__1.i; + } + } + +/* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) */ + aapq1 = -c_abs(&aapq); +/* Computing MAX */ + r__1 = mxaapq, r__2 = -aapq1; + mxaapq = f2cmax(r__1,r__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq1) > *tol) { + r__1 = c_abs(&aapq); + q__1.r = aapq.r / r__1, q__1.i = aapq.i / + r__1; + ompq.r = q__1.r, ompq.i = q__1.i; + notrot = 0; +/* [RTD] ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (r__1 = aqoap - apoaq, abs( + r__1)) * -.5f / aapq1; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5f / theta; + cs = 1.f; + r_cnjg(&q__2, &ompq); + q__1.r = t * q__2.r, q__1.i = t * + q__2.i; + crot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &q__1); + if (rsvec) { + r_cnjg(&q__2, &ompq); + q__1.r = t * q__2.r, q__1.i = t * q__2.i; + crot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &q__1); + } +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq1 + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq1; + aapp *= sqrt((f2cmax(r__1,r__2))); +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(t); + mxsinj = f2cmax(r__1,r__2); + } else { + + + thsign = -r_sign(&c_b27, &aapq1); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1.f / (theta + thsign * sqrt( + theta * theta + 1.f)); + cs = sqrt(1.f / (t * t + 1.f)); + sn = t * cs; +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(sn); + mxsinj = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq1 + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq1; + aapp *= sqrt((f2cmax(r__1,r__2))); + + r_cnjg(&q__2, &ompq); + q__1.r = sn * q__2.r, q__1.i = sn + * q__2.i; + crot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &q__1); + if (rsvec) { + r_cnjg(&q__2, &ompq); + q__1.r = sn * q__2.r, q__1.i = sn * q__2.i; + crot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &q__1); + } + } + i__6 = p; + i__7 = q; + q__2.r = -d__[i__7].r, q__2.i = -d__[ + i__7].i; + q__1.r = q__2.r * ompq.r - q__2.i * + ompq.i, q__1.i = q__2.r * + ompq.i + q__2.i * ompq.r; + d__[i__6].r = q__1.r, d__[i__6].i = + q__1.i; + + } else { + if (aapp > aaqq) { + ccopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + clascl_("G", &c__0, &c__0, &aapp, + &c_b27, m, &c__1, &work[1] + , lda, &ierr); + clascl_("G", &c__0, &c__0, &aaqq, + &c_b27, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + q__1.r = -aapq.r, q__1.i = + -aapq.i; + caxpy_(m, &q__1, &work[1], &c__1, + &a[q * a_dim1 + 1], &c__1) + ; + clascl_("G", &c__0, &c__0, &c_b27, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq1 * + aapq1; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } else { + ccopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[1], &c__1); + clascl_("G", &c__0, &c__0, &aaqq, + &c_b27, m, &c__1, &work[1] + , lda, &ierr); + clascl_("G", &c__0, &c__0, &aapp, + &c_b27, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + r_cnjg(&q__2, &aapq); + q__1.r = -q__2.r, q__1.i = + -q__2.i; + caxpy_(m, &q__1, &work[1], &c__1, + &a[p * a_dim1 + 1], &c__1) + ; + clascl_("G", &c__0, &c__0, &c_b27, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq1 * + aapq1; + sva[p] = aapp * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* Computing 2nd power */ + r__1 = sva[q] / aaqq; + if (r__1 * r__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = scnrm2_(m, &a[q * a_dim1 + + 1], &c__1); + } else { + t = 0.f; + aaqq = 1.f; + classq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq); + } + } +/* Computing 2nd power */ + r__1 = aapp / aapp0; + if (r__1 * r__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = scnrm2_(m, &a[p * a_dim1 + + 1], &c__1); + } else { + t = 0.f; + aapp = 1.f; + classq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp); + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + + if (aapp == 0.f) { +/* Computing MIN */ + i__5 = jgl + kbl - 1; + notrot = notrot + f2cmin(i__5,*n) - jgl + 1; + } + if (aapp < 0.f) { + notrot = 0; + } + + } + +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = f2cmin(i__4,*n); + for (p = igl; p <= i__3; ++p) { + sva[p] = (r__1 = sva[p], abs(r__1)); +/* L2012: */ + } +/* ** */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = scnrm2_(m, &a[*n * a_dim1 + 1], &c__1); + } else { + t = 0.f; + aapp = 1.f; + classq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp); + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < sqrt((real) (*n)) * *tol && (real) (* + n) * mxaapq * mxsinj < *tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } + +/* L1993: */ + } +/* end i=1:NSWEEP loop */ + +/* #:( Reaching this point means that the procedure has not converged. */ + *info = *nsweep - 1; + goto L1995; + +L1994: +/* #:) Reaching this point means numerical convergence after the i-th */ +/* sweep. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the vector SVA() of column norms. */ + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = isamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + i__2 = p; + aapq.r = d__[i__2].r, aapq.i = d__[i__2].i; + i__2 = p; + i__3 = q; + d__[i__2].r = d__[i__3].r, d__[i__2].i = d__[i__3].i; + i__2 = q; + d__[i__2].r = aapq.r, d__[i__2].i = aapq.i; + cswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + cswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } +/* L5991: */ + } + + return 0; +} /* cgsvj0_ */ + diff --git a/lapack-netlib/SRC/cgsvj1.c b/lapack-netlib/SRC/cgsvj1.c new file mode 100644 index 000000000..337b801b0 --- /dev/null +++ b/lapack-netlib/SRC/cgsvj1.c @@ -0,0 +1,1226 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGSVJ1 pre-processor for the routine cgesvj, applies Jacobi rotations targeting only particular + pivots. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGSVJ1 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, */ +/* EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) */ + +/* REAL EPS, SFMIN, TOL */ +/* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP */ +/* CHARACTER*1 JOBV */ +/* COMPLEX A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) */ +/* REAL SVA( N ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGSVJ1 is called from CGESVJ as a pre-processor and that is its main */ +/* > purpose. It applies Jacobi rotations in the same way as CGESVJ does, but */ +/* > it targets only particular pivots and it does not check convergence */ +/* > (stopping criterion). Few tunning parameters (marked by [TP]) are */ +/* > available for the implementer. */ +/* > */ +/* > Further Details */ +/* > ~~~~~~~~~~~~~~~ */ +/* > CGSVJ1 applies few sweeps of Jacobi rotations in the column space of */ +/* > the input M-by-N matrix A. The pivot pairs are taken from the (1,2) */ +/* > off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The */ +/* > block-entries (tiles) of the (1,2) off-diagonal block are marked by the */ +/* > [x]'s in the following scheme: */ +/* > */ +/* > | * * * [x] [x] [x]| */ +/* > | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ +/* > | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ +/* > |[x] [x] [x] * * * | */ +/* > |[x] [x] [x] * * * | */ +/* > |[x] [x] [x] * * * | */ +/* > */ +/* > In terms of the columns of A, the first N1 columns are rotated 'against' */ +/* > the remaining N-N1 columns, trying to increase the angle between the */ +/* > corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is */ +/* > tiled using quadratic tiles of side KBL. Here, KBL is a tunning parameter. */ +/* > The number of sweeps is given in NSWEEP and the orthogonality threshold */ +/* > is given in TOL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether the output from this procedure is used */ +/* > to compute the matrix V: */ +/* > = 'V': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the N-by-N array V. */ +/* > (See the description of V.) */ +/* > = 'A': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the MV-by-N array V. */ +/* > (See the descriptions of MV and V.) */ +/* > = 'N': the Jacobi rotations are not accumulated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. */ +/* > M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N1 */ +/* > \verbatim */ +/* > N1 is INTEGER */ +/* > N1 specifies the 2 x 2 block partition, the first N1 columns are */ +/* > rotated 'against' the remaining N-N1 columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, M-by-N matrix A, such that A*diag(D) represents */ +/* > the input matrix. */ +/* > On exit, */ +/* > A_onexit * D_onexit represents the input matrix A*diag(D) */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of N1, D, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > The array D accumulates the scaling factors from the fast scaled */ +/* > Jacobi rotations. */ +/* > On entry, A*diag(D) represents the input matrix. */ +/* > On exit, A_onexit*diag(D_onexit) represents the input matrix */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of N1, A, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SVA */ +/* > \verbatim */ +/* > SVA is REAL array, dimension (N) */ +/* > On entry, SVA contains the Euclidean norms of the columns of */ +/* > the matrix A*diag(D). */ +/* > On exit, SVA contains the Euclidean norms of the columns of */ +/* > the matrix onexit*diag(D_onexit). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MV */ +/* > \verbatim */ +/* > MV is INTEGER */ +/* > If JOBV = 'A', then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then MV is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is COMPLEX array, dimension (LDV,N) */ +/* > If JOBV = 'V' then N rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'A' then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V', LDV >= N. */ +/* > If JOBV = 'A', LDV >= MV. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EPS */ +/* > \verbatim */ +/* > EPS is REAL */ +/* > EPS = SLAMCH('Epsilon') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SFMIN */ +/* > \verbatim */ +/* > SFMIN is REAL */ +/* > SFMIN = SLAMCH('Safe Minimum') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOL */ +/* > \verbatim */ +/* > TOL is REAL */ +/* > TOL is the threshold for Jacobi rotations. For a pair */ +/* > A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ +/* > applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NSWEEP */ +/* > \verbatim */ +/* > NSWEEP is INTEGER */ +/* > NSWEEP is the number of sweeps of Jacobi rotations to be */ +/* > performed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > LWORK is the dimension of WORK. LWORK >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexOTHERcomputational */ + +/* > \par Contributor: */ +/* ================== */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) */ + +/* ===================================================================== */ +/* Subroutine */ int cgsvj1_(char *jobv, integer *m, integer *n, integer *n1, + complex *a, integer *lda, complex *d__, real *sva, integer *mv, + complex *v, integer *ldv, real *eps, real *sfmin, real *tol, integer * + nsweep, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7; + real r__1, r__2; + complex q__1, q__2, q__3; + + /* Local variables */ + integer nblc; + real aapp; + complex aapq; + real aaqq; + integer nblr, ierr; + real bigtheta; + extern /* Subroutine */ int crot_(integer *, complex *, integer *, + complex *, integer *, real *, complex *); + complex ompq; + integer pskipped; + real aapp0, aapq1, temp1; + integer i__, p, q; + real t; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + real apoaq, aqoap; + extern logical lsame_(char *, char *); + real theta, small; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), cswap_(integer *, complex *, integer *, + complex *, integer *); + logical applv, rsvec; + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + logical rotok; + real rootsfmin; + extern real scnrm2_(integer *, complex *, integer *); + real cs, sn; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + integer ijblsk, swband; + extern integer isamax_(integer *, real *, integer *); + integer blskip; + extern /* Subroutine */ int classq_(integer *, complex *, integer *, real + *, real *); + real mxaapq, thsign, mxsinj; + integer emptsw, notrot, iswrot, jbc; + real big; + integer kbl, igl, ibr, jgl, mvl; + real rootbig, rooteps; + integer rowskip; + real roottol; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --sva; + --d__; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + + /* Function Body */ + applv = lsame_(jobv, "A"); + rsvec = lsame_(jobv, "V"); + if (! (rsvec || applv || lsame_(jobv, "N"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || *n > *m) { + *info = -3; + } else if (*n1 < 0) { + *info = -4; + } else if (*lda < *m) { + *info = -6; + } else if ((rsvec || applv) && *mv < 0) { + *info = -9; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -11; + } else if (*tol <= *eps) { + *info = -14; + } else if (*nsweep < 0) { + *info = -15; + } else if (*lwork < *m) { + *info = -17; + } else { + *info = 0; + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGSVJ1", &i__1, (ftnlen)6); + return 0; + } + + if (rsvec) { + mvl = *n; + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + rooteps = sqrt(*eps); + rootsfmin = sqrt(*sfmin); + small = *sfmin / *eps; + big = 1.f / *sfmin; + rootbig = 1.f / rootsfmin; +/* LARGE = BIG / SQRT( REAL( M*N ) ) */ + bigtheta = 1.f / rooteps; + roottol = sqrt(*tol); + + +/* RSVEC = LSAME( JOBV, 'Y' ) */ + + emptsw = *n1 * (*n - *n1); + notrot = 0; + + + kbl = f2cmin(8,*n); + nblr = *n1 / kbl; + if (nblr * kbl != *n1) { + ++nblr; + } + nblc = (*n - *n1) / kbl; + if (nblc * kbl != *n - *n1) { + ++nblc; + } +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1 + 1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + rowskip = f2cmin(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + swband = 0; +/* [TP] SWBAND is a tuning parameter. It is meaningful and effective */ +/* if CGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm CGEJSV. */ + + +/* | * * * [x] [x] [x]| */ +/* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ +/* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ + + + i__1 = *nsweep; + for (i__ = 1; i__ <= i__1; ++i__) { + + + mxaapq = 0.f; + mxsinj = 0.f; + iswrot = 0; + + notrot = 0; + pskipped = 0; + +/* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */ +/* 1 <= p < q <= N. This is the first step toward a blocked implementation */ +/* of the rotations. New implementation, based on block transformations, */ +/* is under development. */ + + i__2 = nblr; + for (ibr = 1; ibr <= i__2; ++ibr) { + + igl = (ibr - 1) * kbl + 1; + + +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + +/* DO 2010 jbc = ibr + 1, NBL */ + i__3 = nblc; + for (jbc = 1; jbc <= i__3; ++jbc) { + + jgl = (jbc - 1) * kbl + *n1 + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = f2cmin(i__5,*n1); + for (p = igl; p <= i__4; ++p) { + + aapp = sva[p]; + if (aapp > 0.f) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = jgl + kbl - 1; + i__5 = f2cmin(i__6,*n); + for (q = jgl; q <= i__5; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.f) { + aapp0 = aapp; + + +/* Safe Gram matrix computation */ + + if (aaqq >= 1.f) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + cdotc_(&q__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + q__2.r = q__3.r / aaqq, q__2.i = + q__3.i / aaqq; + q__1.r = q__2.r / aapp, q__1.i = + q__2.i / aapp; + aapq.r = q__1.r, aapq.i = q__1.i; + } else { + ccopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + clascl_("G", &c__0, &c__0, &aapp, & + c_b18, m, &c__1, &work[1], + lda, &ierr); + cdotc_(&q__2, m, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + q__1.r = q__2.r / aaqq, q__1.i = + q__2.i / aaqq; + aapq.r = q__1.r, aapq.i = q__1.i; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + cdotc_(&q__3, m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + r__1 = f2cmax(aaqq,aapp); + q__2.r = q__3.r / r__1, q__2.i = + q__3.i / r__1; + r__2 = f2cmin(aaqq,aapp); + q__1.r = q__2.r / r__2, q__1.i = + q__2.i / r__2; + aapq.r = q__1.r, aapq.i = q__1.i; + } else { + ccopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + clascl_("G", &c__0, &c__0, &aaqq, & + c_b18, m, &c__1, &work[1], + lda, &ierr); + cdotc_(&q__2, m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + q__1.r = q__2.r / aapp, q__1.i = + q__2.i / aapp; + aapq.r = q__1.r, aapq.i = q__1.i; + } + } + +/* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) */ + aapq1 = -c_abs(&aapq); +/* Computing MAX */ + r__1 = mxaapq, r__2 = -aapq1; + mxaapq = f2cmax(r__1,r__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq1) > *tol) { + r__1 = c_abs(&aapq); + q__1.r = aapq.r / r__1, q__1.i = aapq.i / + r__1; + ompq.r = q__1.r, ompq.i = q__1.i; + notrot = 0; +/* [RTD] ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (r__1 = aqoap - apoaq, abs( + r__1)) * -.5f / aapq1; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5f / theta; + cs = 1.f; + r_cnjg(&q__2, &ompq); + q__1.r = t * q__2.r, q__1.i = t * + q__2.i; + crot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &q__1); + if (rsvec) { + r_cnjg(&q__2, &ompq); + q__1.r = t * q__2.r, q__1.i = t * q__2.i; + crot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &q__1); + } +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq1 + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq1; + aapp *= sqrt((f2cmax(r__1,r__2))); +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(t); + mxsinj = f2cmax(r__1,r__2); + } else { + + + thsign = -r_sign(&c_b18, &aapq1); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1.f / (theta + thsign * sqrt( + theta * theta + 1.f)); + cs = sqrt(1.f / (t * t + 1.f)); + sn = t * cs; +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(sn); + mxsinj = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq1 + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq1; + aapp *= sqrt((f2cmax(r__1,r__2))); + + r_cnjg(&q__2, &ompq); + q__1.r = sn * q__2.r, q__1.i = sn + * q__2.i; + crot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, &cs, &q__1); + if (rsvec) { + r_cnjg(&q__2, &ompq); + q__1.r = sn * q__2.r, q__1.i = sn * q__2.i; + crot_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, &cs, &q__1); + } + } + i__6 = p; + i__7 = q; + q__2.r = -d__[i__7].r, q__2.i = -d__[ + i__7].i; + q__1.r = q__2.r * ompq.r - q__2.i * + ompq.i, q__1.i = q__2.r * + ompq.i + q__2.i * ompq.r; + d__[i__6].r = q__1.r, d__[i__6].i = + q__1.i; + + } else { + if (aapp > aaqq) { + ccopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + clascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &work[1] + , lda, &ierr); + clascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + q__1.r = -aapq.r, q__1.i = + -aapq.i; + caxpy_(m, &q__1, &work[1], &c__1, + &a[q * a_dim1 + 1], &c__1) + ; + clascl_("G", &c__0, &c__0, &c_b18, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq1 * + aapq1; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } else { + ccopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[1], &c__1); + clascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &work[1] + , lda, &ierr); + clascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + r_cnjg(&q__2, &aapq); + q__1.r = -q__2.r, q__1.i = + -q__2.i; + caxpy_(m, &q__1, &work[1], &c__1, + &a[p * a_dim1 + 1], &c__1) + ; + clascl_("G", &c__0, &c__0, &c_b18, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq1 * + aapq1; + sva[p] = aapp * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* Computing 2nd power */ + r__1 = sva[q] / aaqq; + if (r__1 * r__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = scnrm2_(m, &a[q * a_dim1 + + 1], &c__1); + } else { + t = 0.f; + aaqq = 1.f; + classq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq); + } + } +/* Computing 2nd power */ + r__1 = aapp / aapp0; + if (r__1 * r__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = scnrm2_(m, &a[p * a_dim1 + + 1], &c__1); + } else { + t = 0.f; + aapp = 1.f; + classq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp); + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + + if (aapp == 0.f) { +/* Computing MIN */ + i__5 = jgl + kbl - 1; + notrot = notrot + f2cmin(i__5,*n) - jgl + 1; + } + if (aapp < 0.f) { + notrot = 0; + } + + } + +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = f2cmin(i__4,*n); + for (p = igl; p <= i__3; ++p) { + sva[p] = (r__1 = sva[p], abs(r__1)); +/* L2012: */ + } +/* ** */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = scnrm2_(m, &a[*n * a_dim1 + 1], &c__1); + } else { + t = 0.f; + aapp = 1.f; + classq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp); + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < sqrt((real) (*n)) * *tol && (real) (* + n) * mxaapq * mxsinj < *tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } + +/* L1993: */ + } +/* end i=1:NSWEEP loop */ + +/* #:( Reaching this point means that the procedure has not converged. */ + *info = *nsweep - 1; + goto L1995; + +L1994: +/* #:) Reaching this point means numerical convergence after the i-th */ +/* sweep. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the vector SVA() of column norms. */ + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = isamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + i__2 = p; + aapq.r = d__[i__2].r, aapq.i = d__[i__2].i; + i__2 = p; + i__3 = q; + d__[i__2].r = d__[i__3].r, d__[i__2].i = d__[i__3].i; + i__2 = q; + d__[i__2].r = aapq.r, d__[i__2].i = aapq.i; + cswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + cswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } +/* L5991: */ + } + + + return 0; +} /* cgsvj1_ */ + diff --git a/lapack-netlib/SRC/cgtcon.c b/lapack-netlib/SRC/cgtcon.c new file mode 100644 index 000000000..ffc55df3c --- /dev/null +++ b/lapack-netlib/SRC/cgtcon.c @@ -0,0 +1,645 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGTCON */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGTCON + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, */ +/* WORK, INFO ) */ + +/* CHARACTER NORM */ +/* INTEGER INFO, N */ +/* REAL ANORM, RCOND */ +/* INTEGER IPIV( * ) */ +/* COMPLEX D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGTCON estimates the reciprocal of the condition number of a complex */ +/* > tridiagonal matrix A using the LU factorization as computed by */ +/* > CGTTRF. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies whether the 1-norm condition number or the */ +/* > infinity-norm condition number is required: */ +/* > = '1' or 'O': 1-norm; */ +/* > = 'I': Infinity-norm. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A as computed by CGTTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX array, dimension (N-1) */ +/* > The (n-1) elements of the first superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX array, dimension (N-2) */ +/* > The (n-2) elements of the second superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is REAL */ +/* > If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* > If NORM = 'I', the infinity-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* > estimate of the 1-norm of inv(A) computed in this routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int cgtcon_(char *norm, integer *n, complex *dl, complex * + d__, complex *du, complex *du2, integer *ipiv, real *anorm, real * + rcond, complex *work, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer kase, kase1, i__; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real + *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + real ainvnm; + logical onenrm; + extern /* Subroutine */ int cgttrs_(char *, integer *, integer *, complex + *, complex *, complex *, complex *, integer *, complex *, integer + *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + --work; + --ipiv; + --du2; + --du; + --d__; + --dl; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O"); + if (! onenrm && ! lsame_(norm, "I")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*anorm < 0.f) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGTCON", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.f; + if (*n == 0) { + *rcond = 1.f; + return 0; + } else if (*anorm == 0.f) { + return 0; + } + +/* Check that D(1:N) is non-zero. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + if (d__[i__2].r == 0.f && d__[i__2].i == 0.f) { + return 0; + } +/* L10: */ + } + + ainvnm = 0.f; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L20: + clacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(U)*inv(L). */ + + cgttrs_("No transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1] + , &ipiv[1], &work[1], n, info); + } else { + +/* Multiply by inv(L**H)*inv(U**H). */ + + cgttrs_("Conjugate transpose", n, &c__1, &dl[1], &d__[1], &du[1], + &du2[1], &ipiv[1], &work[1], n, info); + } + goto L20; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.f) { + *rcond = 1.f / ainvnm / *anorm; + } + + return 0; + +/* End of CGTCON */ + +} /* cgtcon_ */ + diff --git a/lapack-netlib/SRC/cgtrfs.c b/lapack-netlib/SRC/cgtrfs.c new file mode 100644 index 000000000..cdb99815b --- /dev/null +++ b/lapack-netlib/SRC/cgtrfs.c @@ -0,0 +1,1015 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGTRFS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGTRFS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, */ +/* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDB, LDX, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* REAL BERR( * ), FERR( * ), RWORK( * ) */ +/* COMPLEX B( LDB, * ), D( * ), DF( * ), DL( * ), */ +/* $ DLF( * ), DU( * ), DU2( * ), DUF( * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGTRFS improves the computed solution to a system of linear */ +/* > equations when the coefficient matrix is tridiagonal, and provides */ +/* > error bounds and backward error estimates for the solution. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX array, dimension (N-1) */ +/* > The (n-1) subdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > The diagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX array, dimension (N-1) */ +/* > The (n-1) superdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DLF */ +/* > \verbatim */ +/* > DLF is COMPLEX array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A as computed by CGTTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DF */ +/* > \verbatim */ +/* > DF is COMPLEX array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DUF */ +/* > \verbatim */ +/* > DUF is COMPLEX array, dimension (N-1) */ +/* > The (n-1) elements of the first superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX array, dimension (N-2) */ +/* > The (n-2) elements of the second superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by CGTTRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is REAL array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > ITMAX is the maximum number of steps of iterative refinement. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int cgtrfs_(char *trans, integer *n, integer *nrhs, complex * + dl, complex *d__, complex *du, complex *dlf, complex *df, complex * + duf, complex *du2, integer *ipiv, complex *b, integer *ldb, complex * + x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, + integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7, i__8, i__9; + real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8, r__9, r__10, r__11, + r__12, r__13, r__14; + complex q__1; + + /* Local variables */ + integer kase; + real safe1, safe2; + integer i__, j; + real s; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + integer count; + extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real + *, integer *, integer *), clagtm_(char *, integer *, integer *, + real *, complex *, complex *, complex *, complex *, integer *, + real *, complex *, integer *); + integer nz; + extern real slamch_(char *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran; + char transn[1]; + extern /* Subroutine */ int cgttrs_(char *, integer *, integer *, complex + *, complex *, complex *, complex *, integer *, complex *, integer + *, integer *); + char transt[1]; + real lstres, eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --dlf; + --df; + --duf; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < f2cmax(1,*n)) { + *info = -13; + } else if (*ldx < f2cmax(1,*n)) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGTRFS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.f; + berr[j] = 0.f; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transn = 'N'; + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transn = 'C'; + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = 4; + eps = slamch_("Epsilon"); + safmin = slamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.f; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + ccopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1); + clagtm_(trans, n, &c__1, &c_b18, &dl[1], &d__[1], &du[1], &x[j * + x_dim1 + 1], ldx, &c_b19, &work[1], n); + +/* Compute abs(op(A))*abs(x) + abs(b) for use in the backward */ +/* error bound. */ + + if (notran) { + if (*n == 1) { + i__2 = j * b_dim1 + 1; + i__3 = j * x_dim1 + 1; + rwork[1] = (r__1 = b[i__2].r, abs(r__1)) + (r__2 = r_imag(&b[ + j * b_dim1 + 1]), abs(r__2)) + ((r__3 = d__[1].r, abs( + r__3)) + (r__4 = r_imag(&d__[1]), abs(r__4))) * (( + r__5 = x[i__3].r, abs(r__5)) + (r__6 = r_imag(&x[j * + x_dim1 + 1]), abs(r__6))); + } else { + i__2 = j * b_dim1 + 1; + i__3 = j * x_dim1 + 1; + i__4 = j * x_dim1 + 2; + rwork[1] = (r__1 = b[i__2].r, abs(r__1)) + (r__2 = r_imag(&b[ + j * b_dim1 + 1]), abs(r__2)) + ((r__3 = d__[1].r, abs( + r__3)) + (r__4 = r_imag(&d__[1]), abs(r__4))) * (( + r__5 = x[i__3].r, abs(r__5)) + (r__6 = r_imag(&x[j * + x_dim1 + 1]), abs(r__6))) + ((r__7 = du[1].r, abs( + r__7)) + (r__8 = r_imag(&du[1]), abs(r__8))) * ((r__9 + = x[i__4].r, abs(r__9)) + (r__10 = r_imag(&x[j * + x_dim1 + 2]), abs(r__10))); + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ - 1; + i__5 = i__ - 1 + j * x_dim1; + i__6 = i__; + i__7 = i__ + j * x_dim1; + i__8 = i__; + i__9 = i__ + 1 + j * x_dim1; + rwork[i__] = (r__1 = b[i__3].r, abs(r__1)) + (r__2 = + r_imag(&b[i__ + j * b_dim1]), abs(r__2)) + ((r__3 + = dl[i__4].r, abs(r__3)) + (r__4 = r_imag(&dl[i__ + - 1]), abs(r__4))) * ((r__5 = x[i__5].r, abs(r__5) + ) + (r__6 = r_imag(&x[i__ - 1 + j * x_dim1]), abs( + r__6))) + ((r__7 = d__[i__6].r, abs(r__7)) + ( + r__8 = r_imag(&d__[i__]), abs(r__8))) * ((r__9 = + x[i__7].r, abs(r__9)) + (r__10 = r_imag(&x[i__ + + j * x_dim1]), abs(r__10))) + ((r__11 = du[i__8].r, + abs(r__11)) + (r__12 = r_imag(&du[i__]), abs( + r__12))) * ((r__13 = x[i__9].r, abs(r__13)) + ( + r__14 = r_imag(&x[i__ + 1 + j * x_dim1]), abs( + r__14))); +/* L30: */ + } + i__2 = *n + j * b_dim1; + i__3 = *n - 1; + i__4 = *n - 1 + j * x_dim1; + i__5 = *n; + i__6 = *n + j * x_dim1; + rwork[*n] = (r__1 = b[i__2].r, abs(r__1)) + (r__2 = r_imag(&b[ + *n + j * b_dim1]), abs(r__2)) + ((r__3 = dl[i__3].r, + abs(r__3)) + (r__4 = r_imag(&dl[*n - 1]), abs(r__4))) + * ((r__5 = x[i__4].r, abs(r__5)) + (r__6 = r_imag(&x[* + n - 1 + j * x_dim1]), abs(r__6))) + ((r__7 = d__[i__5] + .r, abs(r__7)) + (r__8 = r_imag(&d__[*n]), abs(r__8))) + * ((r__9 = x[i__6].r, abs(r__9)) + (r__10 = r_imag(& + x[*n + j * x_dim1]), abs(r__10))); + } + } else { + if (*n == 1) { + i__2 = j * b_dim1 + 1; + i__3 = j * x_dim1 + 1; + rwork[1] = (r__1 = b[i__2].r, abs(r__1)) + (r__2 = r_imag(&b[ + j * b_dim1 + 1]), abs(r__2)) + ((r__3 = d__[1].r, abs( + r__3)) + (r__4 = r_imag(&d__[1]), abs(r__4))) * (( + r__5 = x[i__3].r, abs(r__5)) + (r__6 = r_imag(&x[j * + x_dim1 + 1]), abs(r__6))); + } else { + i__2 = j * b_dim1 + 1; + i__3 = j * x_dim1 + 1; + i__4 = j * x_dim1 + 2; + rwork[1] = (r__1 = b[i__2].r, abs(r__1)) + (r__2 = r_imag(&b[ + j * b_dim1 + 1]), abs(r__2)) + ((r__3 = d__[1].r, abs( + r__3)) + (r__4 = r_imag(&d__[1]), abs(r__4))) * (( + r__5 = x[i__3].r, abs(r__5)) + (r__6 = r_imag(&x[j * + x_dim1 + 1]), abs(r__6))) + ((r__7 = dl[1].r, abs( + r__7)) + (r__8 = r_imag(&dl[1]), abs(r__8))) * ((r__9 + = x[i__4].r, abs(r__9)) + (r__10 = r_imag(&x[j * + x_dim1 + 2]), abs(r__10))); + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ - 1; + i__5 = i__ - 1 + j * x_dim1; + i__6 = i__; + i__7 = i__ + j * x_dim1; + i__8 = i__; + i__9 = i__ + 1 + j * x_dim1; + rwork[i__] = (r__1 = b[i__3].r, abs(r__1)) + (r__2 = + r_imag(&b[i__ + j * b_dim1]), abs(r__2)) + ((r__3 + = du[i__4].r, abs(r__3)) + (r__4 = r_imag(&du[i__ + - 1]), abs(r__4))) * ((r__5 = x[i__5].r, abs(r__5) + ) + (r__6 = r_imag(&x[i__ - 1 + j * x_dim1]), abs( + r__6))) + ((r__7 = d__[i__6].r, abs(r__7)) + ( + r__8 = r_imag(&d__[i__]), abs(r__8))) * ((r__9 = + x[i__7].r, abs(r__9)) + (r__10 = r_imag(&x[i__ + + j * x_dim1]), abs(r__10))) + ((r__11 = dl[i__8].r, + abs(r__11)) + (r__12 = r_imag(&dl[i__]), abs( + r__12))) * ((r__13 = x[i__9].r, abs(r__13)) + ( + r__14 = r_imag(&x[i__ + 1 + j * x_dim1]), abs( + r__14))); +/* L40: */ + } + i__2 = *n + j * b_dim1; + i__3 = *n - 1; + i__4 = *n - 1 + j * x_dim1; + i__5 = *n; + i__6 = *n + j * x_dim1; + rwork[*n] = (r__1 = b[i__2].r, abs(r__1)) + (r__2 = r_imag(&b[ + *n + j * b_dim1]), abs(r__2)) + ((r__3 = du[i__3].r, + abs(r__3)) + (r__4 = r_imag(&du[*n - 1]), abs(r__4))) + * ((r__5 = x[i__4].r, abs(r__5)) + (r__6 = r_imag(&x[* + n - 1 + j * x_dim1]), abs(r__6))) + ((r__7 = d__[i__5] + .r, abs(r__7)) + (r__8 = r_imag(&d__[*n]), abs(r__8))) + * ((r__9 = x[i__6].r, abs(r__9)) + (r__10 = r_imag(& + x[*n + j * x_dim1]), abs(r__10))); + } + } + +/* Compute componentwise relative backward error from formula */ + +/* f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + s = 0.f; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { +/* Computing MAX */ + i__3 = i__; + r__3 = s, r__4 = ((r__1 = work[i__3].r, abs(r__1)) + (r__2 = + r_imag(&work[i__]), abs(r__2))) / rwork[i__]; + s = f2cmax(r__3,r__4); + } else { +/* Computing MAX */ + i__3 = i__; + r__3 = s, r__4 = ((r__1 = work[i__3].r, abs(r__1)) + (r__2 = + r_imag(&work[i__]), abs(r__2)) + safe1) / (rwork[i__] + + safe1); + s = f2cmax(r__3,r__4); + } +/* L50: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) { + +/* Update solution and try again. */ + + cgttrs_(trans, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[ + 1], &work[1], n, info); + caxpy_(n, &c_b26, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1); + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use CLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { + i__3 = i__; + rwork[i__] = (r__1 = work[i__3].r, abs(r__1)) + (r__2 = + r_imag(&work[i__]), abs(r__2)) + nz * eps * rwork[i__] + ; + } else { + i__3 = i__; + rwork[i__] = (r__1 = work[i__3].r, abs(r__1)) + (r__2 = + r_imag(&work[i__]), abs(r__2)) + nz * eps * rwork[i__] + + safe1; + } +/* L60: */ + } + + kase = 0; +L70: + clacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**H). */ + + cgttrs_(transt, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & + ipiv[1], &work[1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; +/* L80: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; +/* L90: */ + } + cgttrs_(transn, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & + ipiv[1], &work[1], n, info); + } + goto L70; + } + +/* Normalize error. */ + + lstres = 0.f; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + i__3 = i__ + j * x_dim1; + r__3 = lstres, r__4 = (r__1 = x[i__3].r, abs(r__1)) + (r__2 = + r_imag(&x[i__ + j * x_dim1]), abs(r__2)); + lstres = f2cmax(r__3,r__4); +/* L100: */ + } + if (lstres != 0.f) { + ferr[j] /= lstres; + } + +/* L110: */ + } + + return 0; + +/* End of CGTRFS */ + +} /* cgtrfs_ */ + diff --git a/lapack-netlib/SRC/cgtsv.c b/lapack-netlib/SRC/cgtsv.c new file mode 100644 index 000000000..7e47dce0c --- /dev/null +++ b/lapack-netlib/SRC/cgtsv.c @@ -0,0 +1,710 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGTSV computes the solution to system of linear equations A * X = B for GT matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGTSV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) */ + +/* INTEGER INFO, LDB, N, NRHS */ +/* COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGTSV solves the equation */ +/* > */ +/* > A*X = B, */ +/* > */ +/* > where A is an N-by-N tridiagonal matrix, by Gaussian elimination with */ +/* > partial pivoting. */ +/* > */ +/* > Note that the equation A**T *X = B may be solved by interchanging the */ +/* > order of the arguments DU and DL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DL */ +/* > \verbatim */ +/* > DL is COMPLEX array, dimension (N-1) */ +/* > On entry, DL must contain the (n-1) subdiagonal elements of */ +/* > A. */ +/* > On exit, DL is overwritten by the (n-2) elements of the */ +/* > second superdiagonal of the upper triangular matrix U from */ +/* > the LU factorization of A, in DL(1), ..., DL(n-2). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > On entry, D must contain the diagonal elements of A. */ +/* > On exit, D is overwritten by the n diagonal elements of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DU */ +/* > \verbatim */ +/* > DU is COMPLEX array, dimension (N-1) */ +/* > On entry, DU must contain the (n-1) superdiagonal elements */ +/* > of A. */ +/* > On exit, DU is overwritten by the (n-1) elements of the first */ +/* > superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero, and the solution */ +/* > has not been computed. The factorization has not been */ +/* > completed unless i = N. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexGTsolve */ + +/* ===================================================================== */ +/* Subroutine */ int cgtsv_(integer *n, integer *nrhs, complex *dl, complex * + d__, complex *du, complex *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + real r__1, r__2, r__3, r__4; + complex q__1, q__2, q__3, q__4, q__5; + + /* Local variables */ + complex temp, mult; + integer j, k; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --dl; + --d__; + --du; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGTSV ", &i__1, (ftnlen)6); + return 0; + } + + if (*n == 0) { + return 0; + } + + i__1 = *n - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k; + if (dl[i__2].r == 0.f && dl[i__2].i == 0.f) { + +/* Subdiagonal is zero, no elimination is required. */ + + i__2 = k; + if (d__[i__2].r == 0.f && d__[i__2].i == 0.f) { + +/* Diagonal is zero: set INFO = K and return; a unique */ +/* solution can not be found. */ + + *info = k; + return 0; + } + } else /* if(complicated condition) */ { + i__2 = k; + i__3 = k; + if ((r__1 = d__[i__2].r, abs(r__1)) + (r__2 = r_imag(&d__[k]), + abs(r__2)) >= (r__3 = dl[i__3].r, abs(r__3)) + (r__4 = + r_imag(&dl[k]), abs(r__4))) { + +/* No row interchange required */ + + c_div(&q__1, &dl[k], &d__[k]); + mult.r = q__1.r, mult.i = q__1.i; + i__2 = k + 1; + i__3 = k + 1; + i__4 = k; + q__2.r = mult.r * du[i__4].r - mult.i * du[i__4].i, q__2.i = + mult.r * du[i__4].i + mult.i * du[i__4].r; + q__1.r = d__[i__3].r - q__2.r, q__1.i = d__[i__3].i - q__2.i; + d__[i__2].r = q__1.r, d__[i__2].i = q__1.i; + i__2 = *nrhs; + for (j = 1; j <= i__2; ++j) { + i__3 = k + 1 + j * b_dim1; + i__4 = k + 1 + j * b_dim1; + i__5 = k + j * b_dim1; + q__2.r = mult.r * b[i__5].r - mult.i * b[i__5].i, q__2.i = + mult.r * b[i__5].i + mult.i * b[i__5].r; + q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4].i - q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L10: */ + } + if (k < *n - 1) { + i__2 = k; + dl[i__2].r = 0.f, dl[i__2].i = 0.f; + } + } else { + +/* Interchange rows K and K+1 */ + + c_div(&q__1, &d__[k], &dl[k]); + mult.r = q__1.r, mult.i = q__1.i; + i__2 = k; + i__3 = k; + d__[i__2].r = dl[i__3].r, d__[i__2].i = dl[i__3].i; + i__2 = k + 1; + temp.r = d__[i__2].r, temp.i = d__[i__2].i; + i__2 = k + 1; + i__3 = k; + q__2.r = mult.r * temp.r - mult.i * temp.i, q__2.i = mult.r * + temp.i + mult.i * temp.r; + q__1.r = du[i__3].r - q__2.r, q__1.i = du[i__3].i - q__2.i; + d__[i__2].r = q__1.r, d__[i__2].i = q__1.i; + if (k < *n - 1) { + i__2 = k; + i__3 = k + 1; + dl[i__2].r = du[i__3].r, dl[i__2].i = du[i__3].i; + i__2 = k + 1; + q__2.r = -mult.r, q__2.i = -mult.i; + i__3 = k; + q__1.r = q__2.r * dl[i__3].r - q__2.i * dl[i__3].i, + q__1.i = q__2.r * dl[i__3].i + q__2.i * dl[i__3] + .r; + du[i__2].r = q__1.r, du[i__2].i = q__1.i; + } + i__2 = k; + du[i__2].r = temp.r, du[i__2].i = temp.i; + i__2 = *nrhs; + for (j = 1; j <= i__2; ++j) { + i__3 = k + j * b_dim1; + temp.r = b[i__3].r, temp.i = b[i__3].i; + i__3 = k + j * b_dim1; + i__4 = k + 1 + j * b_dim1; + b[i__3].r = b[i__4].r, b[i__3].i = b[i__4].i; + i__3 = k + 1 + j * b_dim1; + i__4 = k + 1 + j * b_dim1; + q__2.r = mult.r * b[i__4].r - mult.i * b[i__4].i, q__2.i = + mult.r * b[i__4].i + mult.i * b[i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L20: */ + } + } + } +/* L30: */ + } + i__1 = *n; + if (d__[i__1].r == 0.f && d__[i__1].i == 0.f) { + *info = *n; + return 0; + } + +/* Back solve with the matrix U from the factorization. */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n + j * b_dim1; + c_div(&q__1, &b[*n + j * b_dim1], &d__[*n]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + if (*n > 1) { + i__2 = *n - 1 + j * b_dim1; + i__3 = *n - 1 + j * b_dim1; + i__4 = *n - 1; + i__5 = *n + j * b_dim1; + q__3.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, q__3.i = + du[i__4].r * b[i__5].i + du[i__4].i * b[i__5].r; + q__2.r = b[i__3].r - q__3.r, q__2.i = b[i__3].i - q__3.i; + c_div(&q__1, &q__2, &d__[*n - 1]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } + for (k = *n - 2; k >= 1; --k) { + i__2 = k + j * b_dim1; + i__3 = k + j * b_dim1; + i__4 = k; + i__5 = k + 1 + j * b_dim1; + q__4.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, q__4.i = + du[i__4].r * b[i__5].i + du[i__4].i * b[i__5].r; + q__3.r = b[i__3].r - q__4.r, q__3.i = b[i__3].i - q__4.i; + i__6 = k; + i__7 = k + 2 + j * b_dim1; + q__5.r = dl[i__6].r * b[i__7].r - dl[i__6].i * b[i__7].i, q__5.i = + dl[i__6].r * b[i__7].i + dl[i__6].i * b[i__7].r; + q__2.r = q__3.r - q__5.r, q__2.i = q__3.i - q__5.i; + c_div(&q__1, &q__2, &d__[k]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of CGTSV */ + +} /* cgtsv_ */ + diff --git a/lapack-netlib/SRC/cgtsvx.c b/lapack-netlib/SRC/cgtsvx.c new file mode 100644 index 000000000..98d9fb73a --- /dev/null +++ b/lapack-netlib/SRC/cgtsvx.c @@ -0,0 +1,829 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CGTSVX computes the solution to system of linear equations A * X = B for GT matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGTSVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, */ +/* DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, */ +/* WORK, RWORK, INFO ) */ + +/* CHARACTER FACT, TRANS */ +/* INTEGER INFO, LDB, LDX, N, NRHS */ +/* REAL RCOND */ +/* INTEGER IPIV( * ) */ +/* REAL BERR( * ), FERR( * ), RWORK( * ) */ +/* COMPLEX B( LDB, * ), D( * ), DF( * ), DL( * ), */ +/* $ DLF( * ), DU( * ), DU2( * ), DUF( * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGTSVX uses the LU factorization to compute the solution to a complex */ +/* > system of linear equations A * X = B, A**T * X = B, or A**H * X = B, */ +/* > where A is a tridiagonal matrix of order N and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > Error bounds on the solution and a condition estimate are also */ +/* > provided. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'N', the LU decomposition is used to factor the matrix A */ +/* > as A = L * U, where L is a product of permutation and unit lower */ +/* > bidiagonal matrices and U is upper triangular with nonzeros in */ +/* > only the main diagonal and first two superdiagonals. */ +/* > */ +/* > 2. If some U(i,i)=0, so that U is exactly singular, then the routine */ +/* > returns with INFO = i. Otherwise, the factored form of A is used */ +/* > to estimate the condition number of the matrix A. If the */ +/* > reciprocal of the condition number is less than machine precision, */ +/* > INFO = N+1 is returned as a warning, but the routine still goes on */ +/* > to solve for X and compute error bounds as described below. */ +/* > */ +/* > 3. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 4. Iterative refinement is applied to improve the computed solution */ +/* > matrix and calculate error bounds and backward error estimates */ +/* > for it. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of A has been */ +/* > supplied on entry. */ +/* > = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored form */ +/* > of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not */ +/* > be modified. */ +/* > = 'N': The matrix will be copied to DLF, DF, and DUF */ +/* > and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX array, dimension (N-1) */ +/* > The (n-1) subdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > The n diagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX array, dimension (N-1) */ +/* > The (n-1) superdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DLF */ +/* > \verbatim */ +/* > DLF is COMPLEX array, dimension (N-1) */ +/* > If FACT = 'F', then DLF is an input argument and on entry */ +/* > contains the (n-1) multipliers that define the matrix L from */ +/* > the LU factorization of A as computed by CGTTRF. */ +/* > */ +/* > If FACT = 'N', then DLF is an output argument and on exit */ +/* > contains the (n-1) multipliers that define the matrix L from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DF */ +/* > \verbatim */ +/* > DF is COMPLEX array, dimension (N) */ +/* > If FACT = 'F', then DF is an input argument and on entry */ +/* > contains the n diagonal elements of the upper triangular */ +/* > matrix U from the LU factorization of A. */ +/* > */ +/* > If FACT = 'N', then DF is an output argument and on exit */ +/* > contains the n diagonal elements of the upper triangular */ +/* > matrix U from the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DUF */ +/* > \verbatim */ +/* > DUF is COMPLEX array, dimension (N-1) */ +/* > If FACT = 'F', then DUF is an input argument and on entry */ +/* > contains the (n-1) elements of the first superdiagonal of U. */ +/* > */ +/* > If FACT = 'N', then DUF is an output argument and on exit */ +/* > contains the (n-1) elements of the first superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX array, dimension (N-2) */ +/* > If FACT = 'F', then DU2 is an input argument and on entry */ +/* > contains the (n-2) elements of the second superdiagonal of */ +/* > U. */ +/* > */ +/* > If FACT = 'N', then DU2 is an output argument and on exit */ +/* > contains the (n-2) elements of the second superdiagonal of */ +/* > U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the LU factorization of A as */ +/* > computed by CGTTRF. */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the LU factorization of A; */ +/* > row i of the matrix was interchanged with row IPIV(i). */ +/* > IPIV(i) will always be either i or i+1; IPIV(i) = i indicates */ +/* > a row interchange was not required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > The N-by-NRHS right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX array, dimension (LDX,NRHS) */ +/* > If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The estimate of the reciprocal condition number of the matrix */ +/* > A. If RCOND is less than the machine precision (in */ +/* > particular, if RCOND = 0), the matrix is singular to working */ +/* > precision. This condition is indicated by a return code of */ +/* > INFO > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is REAL array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: U(i,i) is exactly zero. The factorization */ +/* > has not been completed unless i = N, but the */ +/* > factor U is exactly singular, so the solution */ +/* > and error bounds could not be computed. */ +/* > RCOND = 0 is returned. */ +/* > = N+1: U is nonsingular, but RCOND is less than machine */ +/* > precision, meaning that the matrix is singular */ +/* > to working precision. Nevertheless, the */ +/* > solution and error bounds are computed because */ +/* > there are a number of situations where the */ +/* > computed solution can be more accurate than the */ +/* > value of RCOND would suggest. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexGTsolve */ + +/* ===================================================================== */ +/* Subroutine */ int cgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, complex *dl, complex *d__, complex *du, complex *dlf, complex * + df, complex *duf, complex *du2, integer *ipiv, complex *b, integer * + ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, + complex *work, real *rwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1; + + /* Local variables */ + char norm[1]; + extern logical lsame_(char *, char *); + real anorm; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *); + extern real slamch_(char *), clangt_(char *, integer *, complex *, + complex *, complex *); + logical nofact; + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), cgtcon_(char *, + integer *, complex *, complex *, complex *, complex *, integer *, + real *, real *, complex *, integer *), xerbla_(char *, + integer *, ftnlen), cgtrfs_(char *, integer *, integer *, complex + *, complex *, complex *, complex *, complex *, complex *, complex + *, integer *, complex *, integer *, complex *, integer *, real *, + real *, complex *, real *, integer *), cgttrf_(integer *, + complex *, complex *, complex *, complex *, integer *, integer *); + logical notran; + extern /* Subroutine */ int cgttrs_(char *, integer *, integer *, complex + *, complex *, complex *, complex *, integer *, complex *, integer + *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --dlf; + --df; + --duf; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + notran = lsame_(trans, "N"); + if (! nofact && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldb < f2cmax(1,*n)) { + *info = -14; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGTSVX", &i__1, (ftnlen)6); + return 0; + } + + if (nofact) { + +/* Compute the LU factorization of A. */ + + ccopy_(n, &d__[1], &c__1, &df[1], &c__1); + if (*n > 1) { + i__1 = *n - 1; + ccopy_(&i__1, &dl[1], &c__1, &dlf[1], &c__1); + i__1 = *n - 1; + ccopy_(&i__1, &du[1], &c__1, &duf[1], &c__1); + } + cgttrf_(n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.f; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = clangt_(norm, n, &dl[1], &d__[1], &du[1]); + +/* Compute the reciprocal of the condition number of A. */ + + cgtcon_(norm, n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &anorm, + rcond, &work[1], info); + +/* Compute the solution vectors X. */ + + clacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + cgttrs_(trans, n, nrhs, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &x[ + x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solutions and */ +/* compute error bounds and backward error estimates for them. */ + + cgtrfs_(trans, n, nrhs, &dl[1], &d__[1], &du[1], &dlf[1], &df[1], &duf[1], + &du2[1], &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1] + , &berr[1], &work[1], &rwork[1], info); + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < slamch_("Epsilon")) { + *info = *n + 1; + } + + return 0; + +/* End of CGTSVX */ + +} /* cgtsvx_ */ + diff --git a/lapack-netlib/SRC/cgttrf.c b/lapack-netlib/SRC/cgttrf.c new file mode 100644 index 000000000..418dfc8e5 --- /dev/null +++ b/lapack-netlib/SRC/cgttrf.c @@ -0,0 +1,695 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGTTRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGTTRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) */ + +/* INTEGER INFO, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX D( * ), DL( * ), DU( * ), DU2( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGTTRF computes an LU factorization of a complex tridiagonal matrix A */ +/* > using elimination with partial pivoting and row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = L * U */ +/* > where L is a product of permutation and unit lower bidiagonal */ +/* > matrices and U is upper triangular with nonzeros in only the main */ +/* > diagonal and first two superdiagonals. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DL */ +/* > \verbatim */ +/* > DL is COMPLEX array, dimension (N-1) */ +/* > On entry, DL must contain the (n-1) sub-diagonal elements of */ +/* > A. */ +/* > */ +/* > On exit, DL is overwritten by the (n-1) multipliers that */ +/* > define the matrix L from the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > On entry, D must contain the diagonal elements of A. */ +/* > */ +/* > On exit, D is overwritten by the n diagonal elements of the */ +/* > upper triangular matrix U from the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DU */ +/* > \verbatim */ +/* > DU is COMPLEX array, dimension (N-1) */ +/* > On entry, DU must contain the (n-1) super-diagonal elements */ +/* > of A. */ +/* > */ +/* > On exit, DU is overwritten by the (n-1) elements of the first */ +/* > super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX array, dimension (N-2) */ +/* > On exit, DU2 is overwritten by the (n-2) elements of the */ +/* > second super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int cgttrf_(integer *n, complex *dl, complex *d__, complex * + du, complex *du2, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4; + complex q__1, q__2; + + /* Local variables */ + complex fact, temp; + integer i__; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --ipiv; + --du2; + --du; + --d__; + --dl; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + i__1 = -(*info); + xerbla_("CGTTRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Initialize IPIV(i) = i and DU2(i) = 0 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ipiv[i__] = i__; +/* L10: */ + } + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + du2[i__2].r = 0.f, du2[i__2].i = 0.f; +/* L20: */ + } + + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + if ((r__1 = d__[i__2].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), abs( + r__2)) >= (r__3 = dl[i__3].r, abs(r__3)) + (r__4 = r_imag(&dl[ + i__]), abs(r__4))) { + +/* No row interchange required, eliminate DL(I) */ + + i__2 = i__; + if ((r__1 = d__[i__2].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), + abs(r__2)) != 0.f) { + c_div(&q__1, &dl[i__], &d__[i__]); + fact.r = q__1.r, fact.i = q__1.i; + i__2 = i__; + dl[i__2].r = fact.r, dl[i__2].i = fact.i; + i__2 = i__ + 1; + i__3 = i__ + 1; + i__4 = i__; + q__2.r = fact.r * du[i__4].r - fact.i * du[i__4].i, q__2.i = + fact.r * du[i__4].i + fact.i * du[i__4].r; + q__1.r = d__[i__3].r - q__2.r, q__1.i = d__[i__3].i - q__2.i; + d__[i__2].r = q__1.r, d__[i__2].i = q__1.i; + } + } else { + +/* Interchange rows I and I+1, eliminate DL(I) */ + + c_div(&q__1, &d__[i__], &dl[i__]); + fact.r = q__1.r, fact.i = q__1.i; + i__2 = i__; + i__3 = i__; + d__[i__2].r = dl[i__3].r, d__[i__2].i = dl[i__3].i; + i__2 = i__; + dl[i__2].r = fact.r, dl[i__2].i = fact.i; + i__2 = i__; + temp.r = du[i__2].r, temp.i = du[i__2].i; + i__2 = i__; + i__3 = i__ + 1; + du[i__2].r = d__[i__3].r, du[i__2].i = d__[i__3].i; + i__2 = i__ + 1; + i__3 = i__ + 1; + q__2.r = fact.r * d__[i__3].r - fact.i * d__[i__3].i, q__2.i = + fact.r * d__[i__3].i + fact.i * d__[i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - q__2.i; + d__[i__2].r = q__1.r, d__[i__2].i = q__1.i; + i__2 = i__; + i__3 = i__ + 1; + du2[i__2].r = du[i__3].r, du2[i__2].i = du[i__3].i; + i__2 = i__ + 1; + q__2.r = -fact.r, q__2.i = -fact.i; + i__3 = i__ + 1; + q__1.r = q__2.r * du[i__3].r - q__2.i * du[i__3].i, q__1.i = + q__2.r * du[i__3].i + q__2.i * du[i__3].r; + du[i__2].r = q__1.r, du[i__2].i = q__1.i; + ipiv[i__] = i__ + 1; + } +/* L30: */ + } + if (*n > 1) { + i__ = *n - 1; + i__1 = i__; + i__2 = i__; + if ((r__1 = d__[i__1].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), abs( + r__2)) >= (r__3 = dl[i__2].r, abs(r__3)) + (r__4 = r_imag(&dl[ + i__]), abs(r__4))) { + i__1 = i__; + if ((r__1 = d__[i__1].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), + abs(r__2)) != 0.f) { + c_div(&q__1, &dl[i__], &d__[i__]); + fact.r = q__1.r, fact.i = q__1.i; + i__1 = i__; + dl[i__1].r = fact.r, dl[i__1].i = fact.i; + i__1 = i__ + 1; + i__2 = i__ + 1; + i__3 = i__; + q__2.r = fact.r * du[i__3].r - fact.i * du[i__3].i, q__2.i = + fact.r * du[i__3].i + fact.i * du[i__3].r; + q__1.r = d__[i__2].r - q__2.r, q__1.i = d__[i__2].i - q__2.i; + d__[i__1].r = q__1.r, d__[i__1].i = q__1.i; + } + } else { + c_div(&q__1, &d__[i__], &dl[i__]); + fact.r = q__1.r, fact.i = q__1.i; + i__1 = i__; + i__2 = i__; + d__[i__1].r = dl[i__2].r, d__[i__1].i = dl[i__2].i; + i__1 = i__; + dl[i__1].r = fact.r, dl[i__1].i = fact.i; + i__1 = i__; + temp.r = du[i__1].r, temp.i = du[i__1].i; + i__1 = i__; + i__2 = i__ + 1; + du[i__1].r = d__[i__2].r, du[i__1].i = d__[i__2].i; + i__1 = i__ + 1; + i__2 = i__ + 1; + q__2.r = fact.r * d__[i__2].r - fact.i * d__[i__2].i, q__2.i = + fact.r * d__[i__2].i + fact.i * d__[i__2].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - q__2.i; + d__[i__1].r = q__1.r, d__[i__1].i = q__1.i; + ipiv[i__] = i__ + 1; + } + } + +/* Check for a zero on the diagonal of U. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + if ((r__1 = d__[i__2].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), abs( + r__2)) == 0.f) { + *info = i__; + goto L50; + } +/* L40: */ + } +L50: + + return 0; + +/* End of CGTTRF */ + +} /* cgttrf_ */ + diff --git a/lapack-netlib/SRC/cgttrs.c b/lapack-netlib/SRC/cgttrs.c new file mode 100644 index 000000000..5ea811672 --- /dev/null +++ b/lapack-netlib/SRC/cgttrs.c @@ -0,0 +1,637 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGTTRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGTTRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGTTRS solves one of the systems of equations */ +/* > A * X = B, A**T * X = B, or A**H * X = B, */ +/* > with a tridiagonal matrix A using the LU factorization computed */ +/* > by CGTTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations. */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX array, dimension (N-1) */ +/* > The (n-1) elements of the first super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX array, dimension (N-2) */ +/* > The (n-2) elements of the second super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the matrix of right hand side vectors B. */ +/* > On exit, B is overwritten by the solution vectors X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int cgttrs_(char *trans, integer *n, integer *nrhs, complex * + dl, complex *d__, complex *du, complex *du2, integer *ipiv, complex * + b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer j; + extern /* Subroutine */ int cgtts2_(integer *, integer *, integer *, + complex *, complex *, complex *, complex *, integer *, complex *, + integer *); + integer jb, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer itrans; + logical notran; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = *(unsigned char *)trans == 'N' || *(unsigned char *)trans == 'n'; + if (! notran && ! (*(unsigned char *)trans == 'T' || *(unsigned char *) + trans == 't') && ! (*(unsigned char *)trans == 'C' || *(unsigned + char *)trans == 'c')) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < f2cmax(*n,1)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGTTRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + +/* Decode TRANS */ + + if (notran) { + itrans = 0; + } else if (*(unsigned char *)trans == 'T' || *(unsigned char *)trans == + 't') { + itrans = 1; + } else { + itrans = 2; + } + +/* Determine the number of right-hand sides to solve at a time. */ + + if (*nrhs == 1) { + nb = 1; + } else { +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "CGTTRS", trans, n, nrhs, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nb = f2cmax(i__1,i__2); + } + + if (nb >= *nrhs) { + cgtts2_(&itrans, n, nrhs, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[1], + &b[b_offset], ldb); + } else { + i__1 = *nrhs; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = *nrhs - j + 1; + jb = f2cmin(i__3,nb); + cgtts2_(&itrans, n, &jb, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[ + 1], &b[j * b_dim1 + 1], ldb); +/* L10: */ + } + } + +/* End of CGTTRS */ + + return 0; +} /* cgttrs_ */ + diff --git a/lapack-netlib/SRC/cgtts2.c b/lapack-netlib/SRC/cgtts2.c new file mode 100644 index 000000000..8e750dbbc --- /dev/null +++ b/lapack-netlib/SRC/cgtts2.c @@ -0,0 +1,1023 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CGTTS2 solves a system of linear equations with a tridiagonal matrix using the LU factorization + computed by sgttrf. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CGTTS2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) */ + +/* INTEGER ITRANS, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CGTTS2 solves one of the systems of equations */ +/* > A * X = B, A**T * X = B, or A**H * X = B, */ +/* > with a tridiagonal matrix A using the LU factorization computed */ +/* > by CGTTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITRANS */ +/* > \verbatim */ +/* > ITRANS is INTEGER */ +/* > Specifies the form of the system of equations. */ +/* > = 0: A * X = B (No transpose) */ +/* > = 1: A**T * X = B (Transpose) */ +/* > = 2: A**H * X = B (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is COMPLEX array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is COMPLEX array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is COMPLEX array, dimension (N-1) */ +/* > The (n-1) elements of the first super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is COMPLEX array, dimension (N-2) */ +/* > The (n-2) elements of the second super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the matrix of right hand side vectors B. */ +/* > On exit, B is overwritten by the solution vectors X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int cgtts2_(integer *itrans, integer *n, integer *nrhs, + complex *dl, complex *d__, complex *du, complex *du2, integer *ipiv, + complex *b, integer *ldb) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; + complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8; + + /* Local variables */ + complex temp; + integer i__, j; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Quick return if possible */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (*itrans == 0) { + +/* Solve A*X = B using the LU factorization of A, */ +/* overwriting each right hand side vector with its solution. */ + + if (*nrhs <= 1) { + j = 1; +L10: + +/* Solve L*x = b. */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (ipiv[i__] == i__) { + i__2 = i__ + 1 + j * b_dim1; + i__3 = i__ + 1 + j * b_dim1; + i__4 = i__; + i__5 = i__ + j * b_dim1; + q__2.r = dl[i__4].r * b[i__5].r - dl[i__4].i * b[i__5].i, + q__2.i = dl[i__4].r * b[i__5].i + dl[i__4].i * b[ + i__5].r; + q__1.r = b[i__3].r - q__2.r, q__1.i = b[i__3].i - q__2.i; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } else { + i__2 = i__ + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + i__2 = i__ + j * b_dim1; + i__3 = i__ + 1 + j * b_dim1; + b[i__2].r = b[i__3].r, b[i__2].i = b[i__3].i; + i__2 = i__ + 1 + j * b_dim1; + i__3 = i__; + i__4 = i__ + j * b_dim1; + q__2.r = dl[i__3].r * b[i__4].r - dl[i__3].i * b[i__4].i, + q__2.i = dl[i__3].r * b[i__4].i + dl[i__3].i * b[ + i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - q__2.i; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } +/* L20: */ + } + +/* Solve U*x = b. */ + + i__1 = *n + j * b_dim1; + c_div(&q__1, &b[*n + j * b_dim1], &d__[*n]); + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + if (*n > 1) { + i__1 = *n - 1 + j * b_dim1; + i__2 = *n - 1 + j * b_dim1; + i__3 = *n - 1; + i__4 = *n + j * b_dim1; + q__3.r = du[i__3].r * b[i__4].r - du[i__3].i * b[i__4].i, + q__3.i = du[i__3].r * b[i__4].i + du[i__3].i * b[i__4] + .r; + q__2.r = b[i__2].r - q__3.r, q__2.i = b[i__2].i - q__3.i; + c_div(&q__1, &q__2, &d__[*n - 1]); + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + i__1 = i__ + j * b_dim1; + i__2 = i__ + j * b_dim1; + i__3 = i__; + i__4 = i__ + 1 + j * b_dim1; + q__4.r = du[i__3].r * b[i__4].r - du[i__3].i * b[i__4].i, + q__4.i = du[i__3].r * b[i__4].i + du[i__3].i * b[i__4] + .r; + q__3.r = b[i__2].r - q__4.r, q__3.i = b[i__2].i - q__4.i; + i__5 = i__; + i__6 = i__ + 2 + j * b_dim1; + q__5.r = du2[i__5].r * b[i__6].r - du2[i__5].i * b[i__6].i, + q__5.i = du2[i__5].r * b[i__6].i + du2[i__5].i * b[ + i__6].r; + q__2.r = q__3.r - q__5.r, q__2.i = q__3.i - q__5.i; + c_div(&q__1, &q__2, &d__[i__]); + b[i__1].r = q__1.r, b[i__1].i = q__1.i; +/* L30: */ + } + if (j < *nrhs) { + ++j; + goto L10; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve L*x = b. */ + + i__2 = *n - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (ipiv[i__] == i__) { + i__3 = i__ + 1 + j * b_dim1; + i__4 = i__ + 1 + j * b_dim1; + i__5 = i__; + i__6 = i__ + j * b_dim1; + q__2.r = dl[i__5].r * b[i__6].r - dl[i__5].i * b[i__6] + .i, q__2.i = dl[i__5].r * b[i__6].i + dl[i__5] + .i * b[i__6].r; + q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4].i - + q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; + } else { + i__3 = i__ + j * b_dim1; + temp.r = b[i__3].r, temp.i = b[i__3].i; + i__3 = i__ + j * b_dim1; + i__4 = i__ + 1 + j * b_dim1; + b[i__3].r = b[i__4].r, b[i__3].i = b[i__4].i; + i__3 = i__ + 1 + j * b_dim1; + i__4 = i__; + i__5 = i__ + j * b_dim1; + q__2.r = dl[i__4].r * b[i__5].r - dl[i__4].i * b[i__5] + .i, q__2.i = dl[i__4].r * b[i__5].i + dl[i__4] + .i * b[i__5].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; + } +/* L40: */ + } + +/* Solve U*x = b. */ + + i__2 = *n + j * b_dim1; + c_div(&q__1, &b[*n + j * b_dim1], &d__[*n]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + if (*n > 1) { + i__2 = *n - 1 + j * b_dim1; + i__3 = *n - 1 + j * b_dim1; + i__4 = *n - 1; + i__5 = *n + j * b_dim1; + q__3.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, + q__3.i = du[i__4].r * b[i__5].i + du[i__4].i * b[ + i__5].r; + q__2.r = b[i__3].r - q__3.r, q__2.i = b[i__3].i - q__3.i; + c_div(&q__1, &q__2, &d__[*n - 1]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + i__4 = i__; + i__5 = i__ + 1 + j * b_dim1; + q__4.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, + q__4.i = du[i__4].r * b[i__5].i + du[i__4].i * b[ + i__5].r; + q__3.r = b[i__3].r - q__4.r, q__3.i = b[i__3].i - q__4.i; + i__6 = i__; + i__7 = i__ + 2 + j * b_dim1; + q__5.r = du2[i__6].r * b[i__7].r - du2[i__6].i * b[i__7] + .i, q__5.i = du2[i__6].r * b[i__7].i + du2[i__6] + .i * b[i__7].r; + q__2.r = q__3.r - q__5.r, q__2.i = q__3.i - q__5.i; + c_div(&q__1, &q__2, &d__[i__]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L50: */ + } +/* L60: */ + } + } + } else if (*itrans == 1) { + +/* Solve A**T * X = B. */ + + if (*nrhs <= 1) { + j = 1; +L70: + +/* Solve U**T * x = b. */ + + i__1 = j * b_dim1 + 1; + c_div(&q__1, &b[j * b_dim1 + 1], &d__[1]); + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + if (*n > 1) { + i__1 = j * b_dim1 + 2; + i__2 = j * b_dim1 + 2; + i__3 = j * b_dim1 + 1; + q__3.r = du[1].r * b[i__3].r - du[1].i * b[i__3].i, q__3.i = + du[1].r * b[i__3].i + du[1].i * b[i__3].r; + q__2.r = b[i__2].r - q__3.r, q__2.i = b[i__2].i - q__3.i; + c_div(&q__1, &q__2, &d__[2]); + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + } + i__1 = *n; + for (i__ = 3; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + i__4 = i__ - 1; + i__5 = i__ - 1 + j * b_dim1; + q__4.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, + q__4.i = du[i__4].r * b[i__5].i + du[i__4].i * b[i__5] + .r; + q__3.r = b[i__3].r - q__4.r, q__3.i = b[i__3].i - q__4.i; + i__6 = i__ - 2; + i__7 = i__ - 2 + j * b_dim1; + q__5.r = du2[i__6].r * b[i__7].r - du2[i__6].i * b[i__7].i, + q__5.i = du2[i__6].r * b[i__7].i + du2[i__6].i * b[ + i__7].r; + q__2.r = q__3.r - q__5.r, q__2.i = q__3.i - q__5.i; + c_div(&q__1, &q__2, &d__[i__]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L80: */ + } + +/* Solve L**T * x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + i__1 = i__ + j * b_dim1; + i__2 = i__ + j * b_dim1; + i__3 = i__; + i__4 = i__ + 1 + j * b_dim1; + q__2.r = dl[i__3].r * b[i__4].r - dl[i__3].i * b[i__4].i, + q__2.i = dl[i__3].r * b[i__4].i + dl[i__3].i * b[ + i__4].r; + q__1.r = b[i__2].r - q__2.r, q__1.i = b[i__2].i - q__2.i; + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + } else { + i__1 = i__ + 1 + j * b_dim1; + temp.r = b[i__1].r, temp.i = b[i__1].i; + i__1 = i__ + 1 + j * b_dim1; + i__2 = i__ + j * b_dim1; + i__3 = i__; + q__2.r = dl[i__3].r * temp.r - dl[i__3].i * temp.i, + q__2.i = dl[i__3].r * temp.i + dl[i__3].i * + temp.r; + q__1.r = b[i__2].r - q__2.r, q__1.i = b[i__2].i - q__2.i; + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + i__1 = i__ + j * b_dim1; + b[i__1].r = temp.r, b[i__1].i = temp.i; + } +/* L90: */ + } + if (j < *nrhs) { + ++j; + goto L70; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve U**T * x = b. */ + + i__2 = j * b_dim1 + 1; + c_div(&q__1, &b[j * b_dim1 + 1], &d__[1]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + if (*n > 1) { + i__2 = j * b_dim1 + 2; + i__3 = j * b_dim1 + 2; + i__4 = j * b_dim1 + 1; + q__3.r = du[1].r * b[i__4].r - du[1].i * b[i__4].i, + q__3.i = du[1].r * b[i__4].i + du[1].i * b[i__4] + .r; + q__2.r = b[i__3].r - q__3.r, q__2.i = b[i__3].i - q__3.i; + c_div(&q__1, &q__2, &d__[2]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } + i__2 = *n; + for (i__ = 3; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ - 1; + i__6 = i__ - 1 + j * b_dim1; + q__4.r = du[i__5].r * b[i__6].r - du[i__5].i * b[i__6].i, + q__4.i = du[i__5].r * b[i__6].i + du[i__5].i * b[ + i__6].r; + q__3.r = b[i__4].r - q__4.r, q__3.i = b[i__4].i - q__4.i; + i__7 = i__ - 2; + i__8 = i__ - 2 + j * b_dim1; + q__5.r = du2[i__7].r * b[i__8].r - du2[i__7].i * b[i__8] + .i, q__5.i = du2[i__7].r * b[i__8].i + du2[i__7] + .i * b[i__8].r; + q__2.r = q__3.r - q__5.r, q__2.i = q__3.i - q__5.i; + c_div(&q__1, &q__2, &d__[i__]); + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L100: */ + } + +/* Solve L**T * x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + i__4 = i__; + i__5 = i__ + 1 + j * b_dim1; + q__2.r = dl[i__4].r * b[i__5].r - dl[i__4].i * b[i__5] + .i, q__2.i = dl[i__4].r * b[i__5].i + dl[i__4] + .i * b[i__5].r; + q__1.r = b[i__3].r - q__2.r, q__1.i = b[i__3].i - + q__2.i; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } else { + i__2 = i__ + 1 + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + i__2 = i__ + 1 + j * b_dim1; + i__3 = i__ + j * b_dim1; + i__4 = i__; + q__2.r = dl[i__4].r * temp.r - dl[i__4].i * temp.i, + q__2.i = dl[i__4].r * temp.i + dl[i__4].i * + temp.r; + q__1.r = b[i__3].r - q__2.r, q__1.i = b[i__3].i - + q__2.i; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + +/* Solve A**H * X = B. */ + + if (*nrhs <= 1) { + j = 1; +L130: + +/* Solve U**H * x = b. */ + + i__1 = j * b_dim1 + 1; + r_cnjg(&q__2, &d__[1]); + c_div(&q__1, &b[j * b_dim1 + 1], &q__2); + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + if (*n > 1) { + i__1 = j * b_dim1 + 2; + i__2 = j * b_dim1 + 2; + r_cnjg(&q__4, &du[1]); + i__3 = j * b_dim1 + 1; + q__3.r = q__4.r * b[i__3].r - q__4.i * b[i__3].i, q__3.i = + q__4.r * b[i__3].i + q__4.i * b[i__3].r; + q__2.r = b[i__2].r - q__3.r, q__2.i = b[i__2].i - q__3.i; + r_cnjg(&q__5, &d__[2]); + c_div(&q__1, &q__2, &q__5); + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + } + i__1 = *n; + for (i__ = 3; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + r_cnjg(&q__5, &du[i__ - 1]); + i__4 = i__ - 1 + j * b_dim1; + q__4.r = q__5.r * b[i__4].r - q__5.i * b[i__4].i, q__4.i = + q__5.r * b[i__4].i + q__5.i * b[i__4].r; + q__3.r = b[i__3].r - q__4.r, q__3.i = b[i__3].i - q__4.i; + r_cnjg(&q__7, &du2[i__ - 2]); + i__5 = i__ - 2 + j * b_dim1; + q__6.r = q__7.r * b[i__5].r - q__7.i * b[i__5].i, q__6.i = + q__7.r * b[i__5].i + q__7.i * b[i__5].r; + q__2.r = q__3.r - q__6.r, q__2.i = q__3.i - q__6.i; + r_cnjg(&q__8, &d__[i__]); + c_div(&q__1, &q__2, &q__8); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L140: */ + } + +/* Solve L**H * x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + i__1 = i__ + j * b_dim1; + i__2 = i__ + j * b_dim1; + r_cnjg(&q__3, &dl[i__]); + i__3 = i__ + 1 + j * b_dim1; + q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3].i, q__2.i = + q__3.r * b[i__3].i + q__3.i * b[i__3].r; + q__1.r = b[i__2].r - q__2.r, q__1.i = b[i__2].i - q__2.i; + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + } else { + i__1 = i__ + 1 + j * b_dim1; + temp.r = b[i__1].r, temp.i = b[i__1].i; + i__1 = i__ + 1 + j * b_dim1; + i__2 = i__ + j * b_dim1; + r_cnjg(&q__3, &dl[i__]); + q__2.r = q__3.r * temp.r - q__3.i * temp.i, q__2.i = + q__3.r * temp.i + q__3.i * temp.r; + q__1.r = b[i__2].r - q__2.r, q__1.i = b[i__2].i - q__2.i; + b[i__1].r = q__1.r, b[i__1].i = q__1.i; + i__1 = i__ + j * b_dim1; + b[i__1].r = temp.r, b[i__1].i = temp.i; + } +/* L150: */ + } + if (j < *nrhs) { + ++j; + goto L130; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve U**H * x = b. */ + + i__2 = j * b_dim1 + 1; + r_cnjg(&q__2, &d__[1]); + c_div(&q__1, &b[j * b_dim1 + 1], &q__2); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + if (*n > 1) { + i__2 = j * b_dim1 + 2; + i__3 = j * b_dim1 + 2; + r_cnjg(&q__4, &du[1]); + i__4 = j * b_dim1 + 1; + q__3.r = q__4.r * b[i__4].r - q__4.i * b[i__4].i, q__3.i = + q__4.r * b[i__4].i + q__4.i * b[i__4].r; + q__2.r = b[i__3].r - q__3.r, q__2.i = b[i__3].i - q__3.i; + r_cnjg(&q__5, &d__[2]); + c_div(&q__1, &q__2, &q__5); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } + i__2 = *n; + for (i__ = 3; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + r_cnjg(&q__5, &du[i__ - 1]); + i__5 = i__ - 1 + j * b_dim1; + q__4.r = q__5.r * b[i__5].r - q__5.i * b[i__5].i, q__4.i = + q__5.r * b[i__5].i + q__5.i * b[i__5].r; + q__3.r = b[i__4].r - q__4.r, q__3.i = b[i__4].i - q__4.i; + r_cnjg(&q__7, &du2[i__ - 2]); + i__6 = i__ - 2 + j * b_dim1; + q__6.r = q__7.r * b[i__6].r - q__7.i * b[i__6].i, q__6.i = + q__7.r * b[i__6].i + q__7.i * b[i__6].r; + q__2.r = q__3.r - q__6.r, q__2.i = q__3.i - q__6.i; + r_cnjg(&q__8, &d__[i__]); + c_div(&q__1, &q__2, &q__8); + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L160: */ + } + +/* Solve L**H * x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + r_cnjg(&q__3, &dl[i__]); + i__4 = i__ + 1 + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, + q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] + .r; + q__1.r = b[i__3].r - q__2.r, q__1.i = b[i__3].i - + q__2.i; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } else { + i__2 = i__ + 1 + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + i__2 = i__ + 1 + j * b_dim1; + i__3 = i__ + j * b_dim1; + r_cnjg(&q__3, &dl[i__]); + q__2.r = q__3.r * temp.r - q__3.i * temp.i, q__2.i = + q__3.r * temp.i + q__3.i * temp.r; + q__1.r = b[i__3].r - q__2.r, q__1.i = b[i__3].i - + q__2.i; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + } +/* L170: */ + } +/* L180: */ + } + } + } + +/* End of CGTTS2 */ + + return 0; +} /* cgtts2_ */ + diff --git a/lapack-netlib/SRC/chb2st_kernels.c b/lapack-netlib/SRC/chb2st_kernels.c new file mode 100644 index 000000000..e0a057749 --- /dev/null +++ b/lapack-netlib/SRC/chb2st_kernels.c @@ -0,0 +1,801 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHB2ST_KERNELS */ + +/* @generated from zhb2st_kernels.f, fortran z -> c, Wed Dec 7 08:22:40 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHB2ST_KERNELS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHB2ST_KERNELS( UPLO, WANTZ, TTYPE, */ +/* ST, ED, SWEEP, N, NB, IB, */ +/* A, LDA, V, TAU, LDVT, WORK) */ + +/* IMPLICIT NONE */ + +/* CHARACTER UPLO */ +/* LOGICAL WANTZ */ +/* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT */ +/* COMPLEX A( LDA, * ), V( * ), */ +/* TAU( * ), WORK( * ) */ + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST */ +/* > subroutine. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > \endverbatim */ +/* > */ +/* > \param[in] WANTZ */ +/* > \verbatim */ +/* > WANTZ is LOGICAL which indicate if Eigenvalue are requested or both */ +/* > Eigenvalue/Eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TTYPE */ +/* > \verbatim */ +/* > TTYPE is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ST */ +/* > \verbatim */ +/* > ST is INTEGER */ +/* > internal parameter for indices. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ED */ +/* > \verbatim */ +/* > ED is INTEGER */ +/* > internal parameter for indices. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SWEEP */ +/* > \verbatim */ +/* > SWEEP is INTEGER */ +/* > internal parameter for indices. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER. The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB */ +/* > \verbatim */ +/* > NB is INTEGER. The size of the band. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IB */ +/* > \verbatim */ +/* > IB is INTEGER. */ +/* > \endverbatim */ +/* > */ +/* > \param[in, out] A */ +/* > \verbatim */ +/* > A is COMPLEX array. A pointer to the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER. The leading dimension of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is COMPLEX array, dimension 2*n if eigenvalues only are */ +/* > requested or to be queried for vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX array, dimension (2*n). */ +/* > The scalar factors of the Householder reflectors are stored */ +/* > in this array. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array. Workspace of size nb. */ +/* > \endverbatim */ +/* > */ +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Implemented by Azzam Haidar. */ +/* > */ +/* > All details are available on technical report, SC11, SC13 papers. */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int chb2st_kernels_(char *uplo, logical *wantz, integer * + ttype, integer *st, integer *ed, integer *sweep, integer *n, integer * + nb, integer *ib, complex *a, integer *lda, complex *v, complex *tau, + integer *ldvt, complex *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + complex ctmp; + integer dpos, vpos, i__; + extern logical lsame_(char *, char *); + logical upper; + integer j1, j2, lm, ln; + extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, + integer *, complex *); + integer ajeter; + extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex + *, complex *, complex *, integer *, complex *), clarfy_( + char *, integer *, complex *, integer *, complex *, complex *, + integer *, complex *); + integer ofdpos, taupos; + + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --v; + --tau; + --work; + + /* Function Body */ + ajeter = *ib + *ldvt; + upper = lsame_(uplo, "U"); + if (upper) { + dpos = (*nb << 1) + 1; + ofdpos = *nb << 1; + } else { + dpos = 1; + ofdpos = 2; + } + +/* Upper case */ + + if (upper) { + + if (*wantz) { + vpos = (*sweep - 1) % 2 * *n + *st; + taupos = (*sweep - 1) % 2 * *n + *st; + } else { + vpos = (*sweep - 1) % 2 * *n + *st; + taupos = (*sweep - 1) % 2 * *n + *st; + } + + if (*ttype == 1) { + lm = *ed - *st + 1; + + i__1 = vpos; + v[i__1].r = 1.f, v[i__1].i = 0.f; + i__1 = lm - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = vpos + i__; + r_cnjg(&q__1, &a[ofdpos - i__ + (*st + i__) * a_dim1]); + v[i__2].r = q__1.r, v[i__2].i = q__1.i; + i__2 = ofdpos - i__ + (*st + i__) * a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L10: */ + } + r_cnjg(&q__1, &a[ofdpos + *st * a_dim1]); + ctmp.r = q__1.r, ctmp.i = q__1.i; + clarfg_(&lm, &ctmp, &v[vpos + 1], &c__1, &tau[taupos]); + i__1 = ofdpos + *st * a_dim1; + a[i__1].r = ctmp.r, a[i__1].i = ctmp.i; + + lm = *ed - *st + 1; + r_cnjg(&q__1, &tau[taupos]); + i__1 = *lda - 1; + clarfy_(uplo, &lm, &v[vpos], &c__1, &q__1, &a[dpos + *st * a_dim1] + , &i__1, &work[1]); + } + + if (*ttype == 3) { + + lm = *ed - *st + 1; + r_cnjg(&q__1, &tau[taupos]); + i__1 = *lda - 1; + clarfy_(uplo, &lm, &v[vpos], &c__1, &q__1, &a[dpos + *st * a_dim1] + , &i__1, &work[1]); + } + + if (*ttype == 2) { + j1 = *ed + 1; +/* Computing MIN */ + i__1 = *ed + *nb; + j2 = f2cmin(i__1,*n); + ln = *ed - *st + 1; + lm = j2 - j1 + 1; + if (lm > 0) { + r_cnjg(&q__1, &tau[taupos]); + i__1 = *lda - 1; + clarfx_("Left", &ln, &lm, &v[vpos], &q__1, &a[dpos - *nb + j1 + * a_dim1], &i__1, &work[1]); + + if (*wantz) { + vpos = (*sweep - 1) % 2 * *n + j1; + taupos = (*sweep - 1) % 2 * *n + j1; + } else { + vpos = (*sweep - 1) % 2 * *n + j1; + taupos = (*sweep - 1) % 2 * *n + j1; + } + + i__1 = vpos; + v[i__1].r = 1.f, v[i__1].i = 0.f; + i__1 = lm - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = vpos + i__; + r_cnjg(&q__1, &a[dpos - *nb - i__ + (j1 + i__) * a_dim1]); + v[i__2].r = q__1.r, v[i__2].i = q__1.i; + i__2 = dpos - *nb - i__ + (j1 + i__) * a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L30: */ + } + r_cnjg(&q__1, &a[dpos - *nb + j1 * a_dim1]); + ctmp.r = q__1.r, ctmp.i = q__1.i; + clarfg_(&lm, &ctmp, &v[vpos + 1], &c__1, &tau[taupos]); + i__1 = dpos - *nb + j1 * a_dim1; + a[i__1].r = ctmp.r, a[i__1].i = ctmp.i; + + i__1 = ln - 1; + i__2 = *lda - 1; + clarfx_("Right", &i__1, &lm, &v[vpos], &tau[taupos], &a[dpos + - *nb + 1 + j1 * a_dim1], &i__2, &work[1]); + } + } + +/* Lower case */ + + } else { + + if (*wantz) { + vpos = (*sweep - 1) % 2 * *n + *st; + taupos = (*sweep - 1) % 2 * *n + *st; + } else { + vpos = (*sweep - 1) % 2 * *n + *st; + taupos = (*sweep - 1) % 2 * *n + *st; + } + + if (*ttype == 1) { + lm = *ed - *st + 1; + + i__1 = vpos; + v[i__1].r = 1.f, v[i__1].i = 0.f; + i__1 = lm - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = vpos + i__; + i__3 = ofdpos + i__ + (*st - 1) * a_dim1; + v[i__2].r = a[i__3].r, v[i__2].i = a[i__3].i; + i__2 = ofdpos + i__ + (*st - 1) * a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L20: */ + } + clarfg_(&lm, &a[ofdpos + (*st - 1) * a_dim1], &v[vpos + 1], &c__1, + &tau[taupos]); + + lm = *ed - *st + 1; + + r_cnjg(&q__1, &tau[taupos]); + i__1 = *lda - 1; + clarfy_(uplo, &lm, &v[vpos], &c__1, &q__1, &a[dpos + *st * a_dim1] + , &i__1, &work[1]); + } + + if (*ttype == 3) { + lm = *ed - *st + 1; + + r_cnjg(&q__1, &tau[taupos]); + i__1 = *lda - 1; + clarfy_(uplo, &lm, &v[vpos], &c__1, &q__1, &a[dpos + *st * a_dim1] + , &i__1, &work[1]); + } + + if (*ttype == 2) { + j1 = *ed + 1; +/* Computing MIN */ + i__1 = *ed + *nb; + j2 = f2cmin(i__1,*n); + ln = *ed - *st + 1; + lm = j2 - j1 + 1; + + if (lm > 0) { + i__1 = *lda - 1; + clarfx_("Right", &lm, &ln, &v[vpos], &tau[taupos], &a[dpos + * + nb + *st * a_dim1], &i__1, &work[1]); + + if (*wantz) { + vpos = (*sweep - 1) % 2 * *n + j1; + taupos = (*sweep - 1) % 2 * *n + j1; + } else { + vpos = (*sweep - 1) % 2 * *n + j1; + taupos = (*sweep - 1) % 2 * *n + j1; + } + + i__1 = vpos; + v[i__1].r = 1.f, v[i__1].i = 0.f; + i__1 = lm - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = vpos + i__; + i__3 = dpos + *nb + i__ + *st * a_dim1; + v[i__2].r = a[i__3].r, v[i__2].i = a[i__3].i; + i__2 = dpos + *nb + i__ + *st * a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L40: */ + } + clarfg_(&lm, &a[dpos + *nb + *st * a_dim1], &v[vpos + 1], & + c__1, &tau[taupos]); + + i__1 = ln - 1; + r_cnjg(&q__1, &tau[taupos]); + i__2 = *lda - 1; + clarfx_("Left", &lm, &i__1, &v[vpos], &q__1, &a[dpos + *nb - + 1 + (*st + 1) * a_dim1], &i__2, &work[1]); + } + } + } + + return 0; + +/* END OF CHB2ST_KERNELS */ + +} /* chb2st_kernels__ */ + diff --git a/lapack-netlib/SRC/chbev.c b/lapack-netlib/SRC/chbev.c new file mode 100644 index 000000000..e27965321 --- /dev/null +++ b/lapack-netlib/SRC/chbev.c @@ -0,0 +1,711 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHBEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER m +atrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, */ +/* RWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KD, LDAB, LDZ, N */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBEV computes all the eigenvalues and, optionally, eigenvectors of */ +/* > a complex Hermitian band matrix A. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. If UPLO = 'U', the first */ +/* > superdiagonal and the diagonal of the tridiagonal matrix T */ +/* > are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* > the diagonal and first subdiagonal of T are returned in the */ +/* > first two rows of AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* > eigenvectors of the matrix A, with the i-th column of Z */ +/* > holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (f2cmax(1,3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int chbev_(char *jobz, char *uplo, integer *n, integer *kd, + complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, + complex *work, real *rwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; + real r__1; + + /* Local variables */ + integer inde; + real anrm; + integer imax; + real rmin, rmax, sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + logical lower, wantz; + extern real clanhb_(char *, char *, integer *, integer *, complex *, + integer *, real *); + integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), chbtrd_(char *, char *, integer *, integer *, complex *, + integer *, real *, real *, complex *, integer *, complex *, + integer *); + extern real slamch_(char *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + integer indrwk; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), ssterf_(integer + *, real *, real *, integer *); + real smlnum, eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBEV ", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (lower) { + i__1 = ab_dim1 + 1; + w[1] = ab[i__1].r; + } else { + i__1 = *kd + 1 + ab_dim1; + w[1] = ab[i__1].r; + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + clascl_("B", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + clascl_("Q", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. */ + + inde = 1; + chbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &rwork[inde], & + z__[z_offset], ldz, &work[1], &iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + indrwk = inde + *n; + csteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[ + indrwk], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + + return 0; + +/* End of CHBEV */ + +} /* chbev_ */ + diff --git a/lapack-netlib/SRC/chbev_2stage.c b/lapack-netlib/SRC/chbev_2stage.c new file mode 100644 index 000000000..2a56a677d --- /dev/null +++ b/lapack-netlib/SRC/chbev_2stage.c @@ -0,0 +1,819 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for +OTHER matrices */ + +/* @generated from zhbev_2stage.f, fortran z -> c, Sat Nov 5 23:18:20 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBEV_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, */ +/* WORK, LWORK, RWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KD, LDAB, LDZ, N, LWORK */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of */ +/* > a complex Hermitian band matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. If UPLO = 'U', the first */ +/* > superdiagonal and the diagonal of the tridiagonal matrix T */ +/* > are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* > the diagonal and first subdiagonal of T are returned in the */ +/* > first two rows of AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* > eigenvectors of the matrix A, with the i-th column of Z */ +/* > holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension LWORK */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = (2KD+1)*N + KD*NTHREADS */ +/* > where KD is the size of the band. */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (f2cmax(1,3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexOTHEReigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int chbev_2stage_(char *jobz, char *uplo, integer *n, + integer *kd, complex *ab, integer *ldab, real *w, complex *z__, + integer *ldz, complex *work, integer *lwork, real *rwork, integer * + info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; + real r__1; + + /* Local variables */ + extern /* Subroutine */ int chetrd_hb2st_(char *, char *, char *, + integer *, integer *, complex *, integer *, real *, real *, + complex *, integer *, complex *, integer *, integer *); + integer inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + real anrm; + integer imax; + real rmin, rmax, sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + integer lhtrd, lwmin; + logical lower; + integer lwtrd; + logical wantz; + integer ib; + extern real clanhb_(char *, char *, integer *, integer *, complex *, + integer *, real *); + integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *); + extern real slamch_(char *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + integer indwrk, indrwk; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), ssterf_(integer + *, real *, real *, integer *); + integer llwork; + real smlnum; + logical lquery; + real eps; + integer indhous; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + work[1].r = (real) lwmin, work[1].i = 0.f; + } else { + ib = ilaenv2stage_(&c__2, "CHETRD_HB2ST", jobz, n, kd, &c_n1, & + c_n1); + lhtrd = ilaenv2stage_(&c__3, "CHETRD_HB2ST", jobz, n, kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "CHETRD_HB2ST", jobz, n, kd, &ib, & + c_n1); + lwmin = lhtrd + lwtrd; + work[1].r = (real) lwmin, work[1].i = 0.f; + } + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBEV_2STAGE ", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (lower) { + i__1 = ab_dim1 + 1; + w[1] = ab[i__1].r; + } else { + i__1 = *kd + 1 + ab_dim1; + w[1] = ab[i__1].r; + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + clascl_("B", kd, kd, &c_b21, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + clascl_("Q", kd, kd, &c_b21, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */ + + inde = 1; + indhous = 1; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + + chetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], & + rwork[inde], &work[indhous], &lhtrd, &work[indwrk], &llwork, & + iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + indrwk = inde + *n; + csteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[ + indrwk], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1].r = (real) lwmin, work[1].i = 0.f; + + return 0; + +/* End of CHBEV_2STAGE */ + +} /* chbev_2stage__ */ + diff --git a/lapack-netlib/SRC/chbevd.c b/lapack-netlib/SRC/chbevd.c new file mode 100644 index 000000000..e68a31af8 --- /dev/null +++ b/lapack-netlib/SRC/chbevd.c @@ -0,0 +1,832 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHBEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER +matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBEVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, */ +/* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBEVD computes all the eigenvalues and, optionally, eigenvectors of */ +/* > a complex Hermitian band matrix A. If eigenvectors are desired, it */ +/* > uses a divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. If UPLO = 'U', the first */ +/* > superdiagonal and the diagonal of the tridiagonal matrix T */ +/* > are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* > the diagonal and first subdiagonal of T are returned in the */ +/* > first two rows of AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* > eigenvectors of the matrix A, with the i-th column of Z */ +/* > holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N <= 1, LWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, */ +/* > dimension (LRWORK) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of array RWORK. */ +/* > If N <= 1, LRWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of array IWORK. */ +/* > If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int chbevd_(char *jobz, char *uplo, integer *n, integer *kd, + complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, + complex *work, integer *lwork, real *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; + real r__1; + + /* Local variables */ + integer inde; + real anrm; + integer imax; + real rmin, rmax; + integer llwk2; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + real sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + integer lwmin; + logical lower; + integer llrwk; + logical wantz; + integer indwk2; + extern real clanhb_(char *, char *, integer *, integer *, complex *, + integer *, real *); + integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, + integer *, complex *, integer *, real *, integer *, integer *, + integer *, integer *), chbtrd_(char *, char *, integer *, + integer *, complex *, integer *, real *, real *, complex *, + integer *, complex *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + integer indwrk, liwmin; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + integer lrwmin; + real smlnum; + logical lquery; + real eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1 || *lrwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + } else { + if (wantz) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = i__1 * i__1 << 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n; + lrwmin = *n; + liwmin = 1; + } + } + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info == 0) { + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -13; + } else if (*liwork < liwmin && ! lquery) { + *info = -15; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBEVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = ab_dim1 + 1; + w[1] = ab[i__1].r; + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + clascl_("B", kd, kd, &c_b13, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + clascl_("Q", kd, kd, &c_b13, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. */ + + inde = 1; + indwrk = inde + *n; + indwk2 = *n * *n + 1; + llwk2 = *lwork - indwk2 + 1; + llrwk = *lrwork - indwrk + 1; + chbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &rwork[inde], & + z__[z_offset], ldz, &work[1], &iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + cstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], & + llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info); + cgemm_("N", "N", n, n, n, &c_b2, &z__[z_offset], ldz, &work[1], n, & + c_b1, &work[indwk2], n); + clacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + return 0; + +/* End of CHBEVD */ + +} /* chbevd_ */ + diff --git a/lapack-netlib/SRC/chbevd_2stage.c b/lapack-netlib/SRC/chbevd_2stage.c new file mode 100644 index 000000000..ce6f4a2b9 --- /dev/null +++ b/lapack-netlib/SRC/chbevd_2stage.c @@ -0,0 +1,895 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + OTHER matrices */ + +/* @generated from zhbevd_2stage.f, fortran z -> c, Sat Nov 5 23:18:17 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBEVD_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, */ +/* WORK, LWORK, RWORK, LRWORK, IWORK, */ +/* LIWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of */ +/* > a complex Hermitian band matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. If eigenvectors are desired, it */ +/* > uses a divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. If UPLO = 'U', the first */ +/* > superdiagonal and the diagonal of the tridiagonal matrix T */ +/* > are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */ +/* > the diagonal and first subdiagonal of T are returned in the */ +/* > first two rows of AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */ +/* > eigenvectors of the matrix A, with the i-th column of Z */ +/* > holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = (2KD+1)*N + KD*NTHREADS */ +/* > where KD is the size of the band. */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, */ +/* > dimension (LRWORK) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of array RWORK. */ +/* > If N <= 1, LRWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of array IWORK. */ +/* > If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexOTHEReigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int chbevd_2stage_(char *jobz, char *uplo, integer *n, + integer *kd, complex *ab, integer *ldab, real *w, complex *z__, + integer *ldz, complex *work, integer *lwork, real *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, z_dim1, z_offset, i__1, i__2; + real r__1; + + /* Local variables */ + extern /* Subroutine */ int chetrd_hb2st_(char *, char *, char *, + integer *, integer *, complex *, integer *, real *, real *, + complex *, integer *, complex *, integer *, integer *); + integer inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + real anrm; + integer imax; + real rmin, rmax; + integer llwk2; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + real sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + integer indwk, lhtrd, lwmin; + logical lower; + integer lwtrd, llrwk; + logical wantz; + integer indwk2, ib; + extern real clanhb_(char *, char *, integer *, integer *, complex *, + integer *, real *); + integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, + integer *, complex *, integer *, real *, integer *, integer *, + integer *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + integer indrwk, liwmin; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + integer lrwmin, llwork; + real smlnum; + logical lquery; + real eps; + integer indhous; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1 || *lrwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + } else { + ib = ilaenv2stage_(&c__2, "CHETRD_HB2ST", jobz, n, kd, &c_n1, &c_n1); + lhtrd = ilaenv2stage_(&c__3, "CHETRD_HB2ST", jobz, n, kd, &ib, &c_n1); + lwtrd = ilaenv2stage_(&c__4, "CHETRD_HB2ST", jobz, n, kd, &ib, &c_n1); + if (wantz) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = i__1 * i__1 << 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { +/* Computing MAX */ + i__1 = *n, i__2 = lhtrd + lwtrd; + lwmin = f2cmax(i__1,i__2); + lrwmin = *n; + liwmin = 1; + } + } + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < *kd + 1) { + *info = -6; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -9; + } + + if (*info == 0) { + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -13; + } else if (*liwork < liwmin && ! lquery) { + *info = -15; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBEVD_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = ab_dim1 + 1; + w[1] = ab[i__1].r; + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + clascl_("B", kd, kd, &c_b23, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + clascl_("Q", kd, kd, &c_b23, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + } + +/* Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */ + + inde = 1; + indrwk = inde + *n; + llrwk = *lrwork - indrwk + 1; + indhous = 1; + indwk = indhous + lhtrd; + llwork = *lwork - indwk + 1; + indwk2 = indwk + *n * *n; + llwk2 = *lwork - indwk2 + 1; + + chetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], & + rwork[inde], &work[indhous], &lhtrd, &work[indwk], &llwork, & + iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + cstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], & + llwk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + cgemm_("N", "N", n, n, n, &c_b2, &z__[z_offset], ldz, &work[1], n, & + c_b1, &work[indwk2], n); + clacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + return 0; + +/* End of CHBEVD_2STAGE */ + +} /* chbevd_2stage__ */ + diff --git a/lapack-netlib/SRC/chbevx.c b/lapack-netlib/SRC/chbevx.c new file mode 100644 index 000000000..003cb6c83 --- /dev/null +++ b/lapack-netlib/SRC/chbevx.c @@ -0,0 +1,1011 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHBEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER +matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBEVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, */ +/* VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, */ +/* IWORK, IFAIL, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N */ +/* REAL ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBEVX computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian band matrix A. Eigenvalues and eigenvectors */ +/* > can be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found; */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found; */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX array, dimension (LDQ, N) */ +/* > If JOBZ = 'V', the N-by-N unitary matrix used in the */ +/* > reduction to tridiagonal form. */ +/* > If JOBZ = 'N', the array Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. If JOBZ = 'V', then */ +/* > LDQ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is REAL */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is REAL */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is REAL */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing AB to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*SLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*SLAMCH('S'). */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > The first M elements contain the selected eigenvalues in */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* > Their indices are stored in array IFAIL. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexOTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int chbevx_(char *jobz, char *range, char *uplo, integer *n, + integer *kd, complex *ab, integer *ldab, complex *q, integer *ldq, + real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * + m, real *w, complex *z__, integer *ldz, complex *work, real *rwork, + integer *iwork, integer *ifail, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, + i__2; + real r__1, r__2; + + /* Local variables */ + integer indd, inde; + real anrm; + integer imax; + real rmin, rmax; + logical test; + complex ctmp1; + integer itmp1, i__, j, indee; + real sigma; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + char order[1]; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), cswap_(integer *, complex *, integer *, + complex *, integer *); + logical lower; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantz; + integer jj; + extern real clanhb_(char *, char *, integer *, integer *, complex *, + integer *, real *); + logical alleig, indeig; + integer iscale, indibl; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), chbtrd_(char *, char *, integer *, integer *, complex *, + integer *, real *, real *, complex *, integer *, complex *, + integer *); + logical valeig; + extern real slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real abstll, bignum; + integer indiwk, indisp; + extern /* Subroutine */ int cstein_(integer *, real *, real *, integer *, + real *, integer *, integer *, complex *, integer *, real *, + integer *, integer *, integer *); + integer indrwk, indwrk; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), ssterf_(integer + *, real *, real *, integer *); + integer nsplit; + extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, + real *, integer *, integer *, real *, real *, real *, integer *, + integer *, real *, integer *, integer *, real *, integer *, + integer *); + real smlnum, eps, vll, vuu, tmp1; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lower = lsame_(uplo, "L"); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*ldab < *kd + 1) { + *info = -7; + } else if (wantz && *ldq < f2cmax(1,*n)) { + *info = -9; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -11; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -12; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -13; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBEVX", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + *m = 1; + if (lower) { + i__1 = ab_dim1 + 1; + ctmp1.r = ab[i__1].r, ctmp1.i = ab[i__1].i; + } else { + i__1 = *kd + 1 + ab_dim1; + ctmp1.r = ab[i__1].r, ctmp1.i = ab[i__1].i; + } + tmp1 = ctmp1.r; + if (valeig) { + if (! (*vl < tmp1 && *vu >= tmp1)) { + *m = 0; + } + } + if (*m == 1) { + w[1] = ctmp1.r; + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin)); + rmax = f2cmin(r__1,r__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } else { + vll = 0.f; + vuu = 0.f; + } + anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + clascl_("B", kd, kd, &c_b16, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + clascl_("Q", kd, kd, &c_b16, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + if (*abstol > 0.f) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + indwrk = 1; + chbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &rwork[indd], &rwork[ + inde], &q[q_offset], ldq, &work[indwrk], &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal */ +/* to zero, then call SSTERF or CSTEQR. If this fails for some */ +/* eigenvalue, then try SSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.f) { + scopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + ssterf_(n, &w[1], &rwork[indee], info); + } else { + clacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); + i__1 = *n - 1; + scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + csteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + sstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indd], & + rwork[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwk], info); + + if (wantz) { + cstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by CSTEIN. */ + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + ccopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); + cgemv_("N", n, n, &c_b2, &q[q_offset], ldq, &work[1], &c__1, & + c_b1, &z__[j * z_dim1 + 1], &c__1); +/* L20: */ + } + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L50: */ + } + } + + return 0; + +/* End of CHBEVX */ + +} /* chbevx_ */ + diff --git a/lapack-netlib/SRC/chbevx_2stage.c b/lapack-netlib/SRC/chbevx_2stage.c new file mode 100644 index 000000000..d0559bb96 --- /dev/null +++ b/lapack-netlib/SRC/chbevx_2stage.c @@ -0,0 +1,1118 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + OTHER matrices */ + +/* @generated from zhbevx_2stage.f, fortran z -> c, Sat Nov 5 23:18:22 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBEVX_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, */ +/* Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, */ +/* Z, LDZ, WORK, LWORK, RWORK, IWORK, */ +/* IFAIL, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK */ +/* REAL ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian band matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. Eigenvalues and eigenvectors */ +/* > can be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found; */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found; */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > */ +/* > On exit, AB is overwritten by values generated during the */ +/* > reduction to tridiagonal form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX array, dimension (LDQ, N) */ +/* > If JOBZ = 'V', the N-by-N unitary matrix used in the */ +/* > reduction to tridiagonal form. */ +/* > If JOBZ = 'N', the array Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. If JOBZ = 'V', then */ +/* > LDQ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is REAL */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is REAL */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is REAL */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing AB to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*SLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*SLAMCH('S'). */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > The first M elements contain the selected eigenvalues in */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = (2KD+1)*N + KD*NTHREADS */ +/* > where KD is the size of the band. */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* > Their indices are stored in array IFAIL. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexOTHEReigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int chbevx_2stage_(char *jobz, char *range, char *uplo, + integer *n, integer *kd, complex *ab, integer *ldab, complex *q, + integer *ldq, real *vl, real *vu, integer *il, integer *iu, real * + abstol, integer *m, real *w, complex *z__, integer *ldz, complex * + work, integer *lwork, real *rwork, integer *iwork, integer *ifail, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, + i__2; + real r__1, r__2; + + /* Local variables */ + extern /* Subroutine */ int chetrd_hb2st_(char *, char *, char *, + integer *, integer *, complex *, integer *, real *, real *, + complex *, integer *, complex *, integer *, integer *); + integer indd, inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + real anrm; + integer imax; + real rmin, rmax; + logical test; + complex ctmp1; + integer itmp1, i__, j, indee; + real sigma; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + char order[1]; + integer lhtrd; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), cswap_(integer *, complex *, integer *, + complex *, integer *); + integer lwmin; + logical lower; + integer lwtrd; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantz; + integer ib, jj; + extern real clanhb_(char *, char *, integer *, integer *, complex *, + integer *, real *); + logical alleig, indeig; + integer iscale, indibl; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *); + logical valeig; + extern real slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real abstll, bignum; + integer indiwk, indisp; + extern /* Subroutine */ int cstein_(integer *, real *, real *, integer *, + real *, integer *, integer *, complex *, integer *, real *, + integer *, integer *, integer *); + integer indrwk, indwrk; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), ssterf_(integer + *, real *, real *, integer *); + integer nsplit, llwork; + real smlnum; + extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, + real *, integer *, integer *, real *, real *, real *, integer *, + integer *, real *, integer *, integer *, real *, integer *, + integer *); + logical lquery; + real eps, vll, vuu; + integer indhous; + real tmp1; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*ldab < *kd + 1) { + *info = -7; + } else if (wantz && *ldq < f2cmax(1,*n)) { + *info = -9; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -11; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -12; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -13; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -18; + } + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + work[1].r = (real) lwmin, work[1].i = 0.f; + } else { + ib = ilaenv2stage_(&c__2, "CHETRD_HB2ST", jobz, n, kd, &c_n1, & + c_n1); + lhtrd = ilaenv2stage_(&c__3, "CHETRD_HB2ST", jobz, n, kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "CHETRD_HB2ST", jobz, n, kd, &ib, & + c_n1); + lwmin = lhtrd + lwtrd; + work[1].r = (real) lwmin, work[1].i = 0.f; + } + + if (*lwork < lwmin && ! lquery) { + *info = -20; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBEVX_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + *m = 1; + if (lower) { + i__1 = ab_dim1 + 1; + ctmp1.r = ab[i__1].r, ctmp1.i = ab[i__1].i; + } else { + i__1 = *kd + 1 + ab_dim1; + ctmp1.r = ab[i__1].r, ctmp1.i = ab[i__1].i; + } + tmp1 = ctmp1.r; + if (valeig) { + if (! (*vl < tmp1 && *vu >= tmp1)) { + *m = 0; + } + } + if (*m == 1) { + w[1] = ctmp1.r; + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin)); + rmax = f2cmin(r__1,r__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } else { + vll = 0.f; + vuu = 0.f; + } + anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]); + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + clascl_("B", kd, kd, &c_b26, &sigma, n, n, &ab[ab_offset], ldab, + info); + } else { + clascl_("Q", kd, kd, &c_b26, &sigma, n, n, &ab[ab_offset], ldab, + info); + } + if (*abstol > 0.f) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + + indhous = 1; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + + chetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &rwork[indd], + &rwork[inde], &work[indhous], &lhtrd, &work[indwrk], &llwork, & + iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal */ +/* to zero, then call SSTERF or CSTEQR. If this fails for some */ +/* eigenvalue, then try SSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.f) { + scopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + ssterf_(n, &w[1], &rwork[indee], info); + } else { + clacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); + i__1 = *n - 1; + scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + csteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + sstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indd], & + rwork[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwk], info); + + if (wantz) { + cstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by CSTEIN. */ + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + ccopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); + cgemv_("N", n, n, &c_b2, &q[q_offset], ldq, &work[1], &c__1, & + c_b1, &z__[j * z_dim1 + 1], &c__1); +/* L20: */ + } + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L50: */ + } + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1].r = (real) lwmin, work[1].i = 0.f; + + return 0; + +/* End of CHBEVX_2STAGE */ + +} /* chbevx_2stage__ */ + diff --git a/lapack-netlib/SRC/chbgst.c b/lapack-netlib/SRC/chbgst.c new file mode 100644 index 000000000..a69683e5a --- /dev/null +++ b/lapack-netlib/SRC/chbgst.c @@ -0,0 +1,2599 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHBGST */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBGST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, */ +/* LDX, WORK, RWORK, INFO ) */ + +/* CHARACTER UPLO, VECT */ +/* INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N */ +/* REAL RWORK( * ) */ +/* COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */ +/* $ X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBGST reduces a complex Hermitian-definite banded generalized */ +/* > eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, */ +/* > such that C has the same bandwidth as A. */ +/* > */ +/* > B must have been previously factorized as S**H*S by CPBSTF, using a */ +/* > split Cholesky factorization. A is overwritten by C = X**H*A*X, where */ +/* > X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the */ +/* > bandwidth of A. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > = 'N': do not form the transformation matrix X; */ +/* > = 'V': form X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KA */ +/* > \verbatim */ +/* > KA is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KB */ +/* > \verbatim */ +/* > KB is INTEGER */ +/* > The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB,N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first ka+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+ka). */ +/* > */ +/* > On exit, the transformed matrix X**H*A*X, stored in the same */ +/* > format as A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KA+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BB */ +/* > \verbatim */ +/* > BB is COMPLEX array, dimension (LDBB,N) */ +/* > The banded factor S from the split Cholesky factorization of */ +/* > B, as returned by CPBSTF, stored in the first kb+1 rows of */ +/* > the array. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBB */ +/* > \verbatim */ +/* > LDBB is INTEGER */ +/* > The leading dimension of the array BB. LDBB >= KB+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX array, dimension (LDX,N) */ +/* > If VECT = 'V', the n-by-n matrix X. */ +/* > If VECT = 'N', the array X is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. */ +/* > LDX >= f2cmax(1,N) if VECT = 'V'; LDX >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int chbgst_(char *vect, char *uplo, integer *n, integer *ka, + integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, + complex *x, integer *ldx, complex *work, real *rwork, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, x_dim1, x_offset, i__1, + i__2, i__3, i__4, i__5, i__6, i__7, i__8; + real r__1; + complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8, q__9, q__10; + + /* Local variables */ + integer inca; + extern /* Subroutine */ int crot_(integer *, complex *, integer *, + complex *, integer *, real *, complex *); + integer i__, j, k, l, m; + extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, integer *); + complex t; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgeru_(integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, integer *); + integer i0, i1; + logical upper; + integer i2, j1, j2; + logical wantx; + extern /* Subroutine */ int clar2v_(integer *, complex *, complex *, + complex *, integer *, real *, complex *, integer *); + complex ra; + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *); + integer nr, nx; + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), claset_(char *, integer *, integer *, complex *, complex *, + complex *, integer *), clartg_(complex *, complex *, real + *, complex *, complex *), xerbla_(char *, integer *, ftnlen), + clargv_(integer *, complex *, integer *, complex *, integer *, + real *, integer *); + logical update; + extern /* Subroutine */ int clartv_(integer *, complex *, integer *, + complex *, integer *, real *, complex *, integer *); + integer ka1, kb1; + complex ra1; + integer j1t, j2t; + real bii; + integer kbt, nrt; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1 * 1; + bb -= bb_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --work; + --rwork; + + /* Function Body */ + wantx = lsame_(vect, "V"); + upper = lsame_(uplo, "U"); + ka1 = *ka + 1; + kb1 = *kb + 1; + *info = 0; + if (! wantx && ! lsame_(vect, "N")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ka < 0) { + *info = -4; + } else if (*kb < 0 || *kb > *ka) { + *info = -5; + } else if (*ldab < *ka + 1) { + *info = -7; + } else if (*ldbb < *kb + 1) { + *info = -9; + } else if (*ldx < 1 || wantx && *ldx < f2cmax(1,*n)) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBGST", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + inca = *ldab * ka1; + +/* Initialize X to the unit matrix, if needed */ + + if (wantx) { + claset_("Full", n, n, &c_b1, &c_b2, &x[x_offset], ldx); + } + +/* Set M to the splitting point m. It must be the same value as is */ +/* used in CPBSTF. The chosen value allows the arrays WORK and RWORK */ +/* to be of dimension (N). */ + + m = (*n + *kb) / 2; + +/* The routine works in two phases, corresponding to the two halves */ +/* of the split Cholesky factorization of B as S**H*S where */ + +/* S = ( U ) */ +/* ( M L ) */ + +/* with U upper triangular of order m, and L lower triangular of */ +/* order n-m. S has the same bandwidth as B. */ + +/* S is treated as a product of elementary matrices: */ + +/* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) */ + +/* where S(i) is determined by the i-th row of S. */ + +/* In phase 1, the index i takes the values n, n-1, ... , m+1; */ +/* in phase 2, it takes the values 1, 2, ... , m. */ + +/* For each value of i, the current matrix A is updated by forming */ +/* inv(S(i))**H*A*inv(S(i)). This creates a triangular bulge outside */ +/* the band of A. The bulge is then pushed down toward the bottom of */ +/* A in phase 1, and up toward the top of A in phase 2, by applying */ +/* plane rotations. */ + +/* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 */ +/* of them are linearly independent, so annihilating a bulge requires */ +/* only 2*kb-1 plane rotations. The rotations are divided into a 1st */ +/* set of kb-1 rotations, and a 2nd set of kb rotations. */ + +/* Wherever possible, rotations are generated and applied in vector */ +/* operations of length NR between the indices J1 and J2 (sometimes */ +/* replaced by modified values NRT, J1T or J2T). */ + +/* The real cosines and complex sines of the rotations are stored in */ +/* the arrays RWORK and WORK, those of the 1st set in elements */ +/* 2:m-kb-1, and those of the 2nd set in elements m-kb+1:n. */ + +/* The bulges are not formed explicitly; nonzero elements outside the */ +/* band are created only when they are required for generating new */ +/* rotations; they are stored in the array WORK, in positions where */ +/* they are later overwritten by the sines of the rotations which */ +/* annihilate them. */ + +/* **************************** Phase 1 ***************************** */ + +/* The logical structure of this phase is: */ + +/* UPDATE = .TRUE. */ +/* DO I = N, M + 1, -1 */ +/* use S(i) to update A and create a new bulge */ +/* apply rotations to push all bulges KA positions downward */ +/* END DO */ +/* UPDATE = .FALSE. */ +/* DO I = M + KA + 1, N - 1 */ +/* apply rotations to push all bulges KA positions downward */ +/* END DO */ + +/* To avoid duplicating code, the two loops are merged. */ + + update = TRUE_; + i__ = *n + 1; +L10: + if (update) { + --i__; +/* Computing MIN */ + i__1 = *kb, i__2 = i__ - 1; + kbt = f2cmin(i__1,i__2); + i0 = i__ - 1; +/* Computing MIN */ + i__1 = *n, i__2 = i__ + *ka; + i1 = f2cmin(i__1,i__2); + i2 = i__ - kbt + ka1; + if (i__ < m + 1) { + update = FALSE_; + ++i__; + i0 = m; + if (*ka == 0) { + goto L480; + } + goto L10; + } + } else { + i__ += *ka; + if (i__ > *n - 1) { + goto L480; + } + } + + if (upper) { + +/* Transform A, working with the upper triangle */ + + if (update) { + +/* Form inv(S(i))**H * A * inv(S(i)) */ + + i__1 = kb1 + i__ * bb_dim1; + bii = bb[i__1].r; + i__1 = ka1 + i__ * ab_dim1; + i__2 = ka1 + i__ * ab_dim1; + r__1 = ab[i__2].r / bii / bii; + ab[i__1].r = r__1, ab[i__1].i = 0.f; + i__1 = i1; + for (j = i__ + 1; j <= i__1; ++j) { + i__2 = i__ - j + ka1 + j * ab_dim1; + i__3 = i__ - j + ka1 + j * ab_dim1; + q__1.r = ab[i__3].r / bii, q__1.i = ab[i__3].i / bii; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; +/* L20: */ + } +/* Computing MAX */ + i__1 = 1, i__2 = i__ - *ka; + i__3 = i__ - 1; + for (j = f2cmax(i__1,i__2); j <= i__3; ++j) { + i__1 = j - i__ + ka1 + i__ * ab_dim1; + i__2 = j - i__ + ka1 + i__ * ab_dim1; + q__1.r = ab[i__2].r / bii, q__1.i = ab[i__2].i / bii; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L30: */ + } + i__3 = i__ - 1; + for (k = i__ - kbt; k <= i__3; ++k) { + i__1 = k; + for (j = i__ - kbt; j <= i__1; ++j) { + i__2 = j - k + ka1 + k * ab_dim1; + i__4 = j - k + ka1 + k * ab_dim1; + i__5 = j - i__ + kb1 + i__ * bb_dim1; + r_cnjg(&q__5, &ab[k - i__ + ka1 + i__ * ab_dim1]); + q__4.r = bb[i__5].r * q__5.r - bb[i__5].i * q__5.i, + q__4.i = bb[i__5].r * q__5.i + bb[i__5].i * + q__5.r; + q__3.r = ab[i__4].r - q__4.r, q__3.i = ab[i__4].i - + q__4.i; + r_cnjg(&q__7, &bb[k - i__ + kb1 + i__ * bb_dim1]); + i__6 = j - i__ + ka1 + i__ * ab_dim1; + q__6.r = q__7.r * ab[i__6].r - q__7.i * ab[i__6].i, + q__6.i = q__7.r * ab[i__6].i + q__7.i * ab[i__6] + .r; + q__2.r = q__3.r - q__6.r, q__2.i = q__3.i - q__6.i; + i__7 = ka1 + i__ * ab_dim1; + r__1 = ab[i__7].r; + i__8 = j - i__ + kb1 + i__ * bb_dim1; + q__9.r = r__1 * bb[i__8].r, q__9.i = r__1 * bb[i__8].i; + r_cnjg(&q__10, &bb[k - i__ + kb1 + i__ * bb_dim1]); + q__8.r = q__9.r * q__10.r - q__9.i * q__10.i, q__8.i = + q__9.r * q__10.i + q__9.i * q__10.r; + q__1.r = q__2.r + q__8.r, q__1.i = q__2.i + q__8.i; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; +/* L40: */ + } +/* Computing MAX */ + i__1 = 1, i__2 = i__ - *ka; + i__4 = i__ - kbt - 1; + for (j = f2cmax(i__1,i__2); j <= i__4; ++j) { + i__1 = j - k + ka1 + k * ab_dim1; + i__2 = j - k + ka1 + k * ab_dim1; + r_cnjg(&q__3, &bb[k - i__ + kb1 + i__ * bb_dim1]); + i__5 = j - i__ + ka1 + i__ * ab_dim1; + q__2.r = q__3.r * ab[i__5].r - q__3.i * ab[i__5].i, + q__2.i = q__3.r * ab[i__5].i + q__3.i * ab[i__5] + .r; + q__1.r = ab[i__2].r - q__2.r, q__1.i = ab[i__2].i - + q__2.i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L50: */ + } +/* L60: */ + } + i__3 = i1; + for (j = i__; j <= i__3; ++j) { +/* Computing MAX */ + i__4 = j - *ka, i__1 = i__ - kbt; + i__2 = i__ - 1; + for (k = f2cmax(i__4,i__1); k <= i__2; ++k) { + i__4 = k - j + ka1 + j * ab_dim1; + i__1 = k - j + ka1 + j * ab_dim1; + i__5 = k - i__ + kb1 + i__ * bb_dim1; + i__6 = i__ - j + ka1 + j * ab_dim1; + q__2.r = bb[i__5].r * ab[i__6].r - bb[i__5].i * ab[i__6] + .i, q__2.i = bb[i__5].r * ab[i__6].i + bb[i__5].i + * ab[i__6].r; + q__1.r = ab[i__1].r - q__2.r, q__1.i = ab[i__1].i - + q__2.i; + ab[i__4].r = q__1.r, ab[i__4].i = q__1.i; +/* L70: */ + } +/* L80: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + i__3 = *n - m; + r__1 = 1.f / bii; + csscal_(&i__3, &r__1, &x[m + 1 + i__ * x_dim1], &c__1); + if (kbt > 0) { + i__3 = *n - m; + q__1.r = -1.f, q__1.i = 0.f; + cgerc_(&i__3, &kbt, &q__1, &x[m + 1 + i__ * x_dim1], & + c__1, &bb[kb1 - kbt + i__ * bb_dim1], &c__1, &x[m + + 1 + (i__ - kbt) * x_dim1], ldx); + } + } + +/* store a(i,i1) in RA1 for use in next loop over K */ + + i__3 = i__ - i1 + ka1 + i1 * ab_dim1; + ra1.r = ab[i__3].r, ra1.i = ab[i__3].i; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions down toward the bottom of the */ +/* band */ + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ - k + *ka < *n && i__ - k > 1) { + +/* generate rotation to annihilate a(i,i-k+ka+1) */ + + clartg_(&ab[k + 1 + (i__ - k + *ka) * ab_dim1], &ra1, & + rwork[i__ - k + *ka - m], &work[i__ - k + *ka - m] + , &ra); + +/* create nonzero element a(i-k,i-k+ka+1) outside the */ +/* band and store it in WORK(i-k) */ + + i__2 = kb1 - k + i__ * bb_dim1; + q__2.r = -bb[i__2].r, q__2.i = -bb[i__2].i; + q__1.r = q__2.r * ra1.r - q__2.i * ra1.i, q__1.i = q__2.r + * ra1.i + q__2.i * ra1.r; + t.r = q__1.r, t.i = q__1.i; + i__2 = i__ - k; + i__4 = i__ - k + *ka - m; + q__2.r = rwork[i__4] * t.r, q__2.i = rwork[i__4] * t.i; + r_cnjg(&q__4, &work[i__ - k + *ka - m]); + i__1 = (i__ - k + *ka) * ab_dim1 + 1; + q__3.r = q__4.r * ab[i__1].r - q__4.i * ab[i__1].i, + q__3.i = q__4.r * ab[i__1].i + q__4.i * ab[i__1] + .r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; + i__2 = (i__ - k + *ka) * ab_dim1 + 1; + i__4 = i__ - k + *ka - m; + q__2.r = work[i__4].r * t.r - work[i__4].i * t.i, q__2.i = + work[i__4].r * t.i + work[i__4].i * t.r; + i__1 = i__ - k + *ka - m; + i__5 = (i__ - k + *ka) * ab_dim1 + 1; + q__3.r = rwork[i__1] * ab[i__5].r, q__3.i = rwork[i__1] * + ab[i__5].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; + ra1.r = ra.r, ra1.i = ra.i; + } + } +/* Computing MAX */ + i__2 = 1, i__4 = k - i0 + 2; + j2 = i__ - k - 1 + f2cmax(i__2,i__4) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (update) { +/* Computing MAX */ + i__2 = j2, i__4 = i__ + (*ka << 1) - k + 1; + j2t = f2cmax(i__2,i__4); + } else { + j2t = j2; + } + nrt = (*n - j2t + *ka) / ka1; + i__2 = j1; + i__4 = ka1; + for (j = j2t; i__4 < 0 ? j >= i__2 : j <= i__2; j += i__4) { + +/* create nonzero element a(j-ka,j+1) outside the band */ +/* and store it in WORK(j-m) */ + + i__1 = j - m; + i__5 = j - m; + i__6 = (j + 1) * ab_dim1 + 1; + q__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * ab[i__6] + .i, q__1.i = work[i__5].r * ab[i__6].i + work[i__5].i + * ab[i__6].r; + work[i__1].r = q__1.r, work[i__1].i = q__1.i; + i__1 = (j + 1) * ab_dim1 + 1; + i__5 = j - m; + i__6 = (j + 1) * ab_dim1 + 1; + q__1.r = rwork[i__5] * ab[i__6].r, q__1.i = rwork[i__5] * ab[ + i__6].i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L90: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + clargv_(&nrt, &ab[j2t * ab_dim1 + 1], &inca, &work[j2t - m], & + ka1, &rwork[j2t - m], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the right */ + + i__4 = *ka - 1; + for (l = 1; l <= i__4; ++l) { + clartv_(&nr, &ab[ka1 - l + j2 * ab_dim1], &inca, &ab[*ka + - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2 - m], + &work[j2 - m], &ka1); +/* L100: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + clar2v_(&nr, &ab[ka1 + j2 * ab_dim1], &ab[ka1 + (j2 + 1) * + ab_dim1], &ab[*ka + (j2 + 1) * ab_dim1], &inca, & + rwork[j2 - m], &work[j2 - m], &ka1); + + clacgv_(&nr, &work[j2 - m], &ka1); + } + +/* start applying rotations in 1st set from the left */ + + i__4 = *kb - k + 1; + for (l = *ka - 1; l >= i__4; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & + ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & + rwork[j2 - m], &work[j2 - m], &ka1); + } +/* L110: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__4 = j1; + i__2 = ka1; + for (j = j2; i__2 < 0 ? j >= i__4 : j <= i__4; j += i__2) { + i__1 = *n - m; + r_cnjg(&q__1, &work[j - m]); + crot_(&i__1, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &rwork[j - m], &q__1); +/* L120: */ + } + } +/* L130: */ + } + + if (update) { + if (i2 <= *n && kbt > 0) { + +/* create nonzero element a(i-kbt,i-kbt+ka+1) outside the */ +/* band and store it in WORK(i-kbt) */ + + i__3 = i__ - kbt; + i__2 = kb1 - kbt + i__ * bb_dim1; + q__2.r = -bb[i__2].r, q__2.i = -bb[i__2].i; + q__1.r = q__2.r * ra1.r - q__2.i * ra1.i, q__1.i = q__2.r * + ra1.i + q__2.i * ra1.r; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__3 = 2, i__2 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__3,i__2) * ka1; + } else { +/* Computing MAX */ + i__3 = 1, i__2 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__3,i__2) * ka1; + } + +/* finish applying rotations in 2nd set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + *ka + l) / ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[l + (j2 - l + 1) * ab_dim1], &inca, &ab[ + l + 1 + (j2 - l + 1) * ab_dim1], &inca, &rwork[j2 + - *ka], &work[j2 - *ka], &ka1); + } +/* L140: */ + } + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + i__3 = j2; + i__2 = -ka1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { + i__4 = j; + i__1 = j - *ka; + work[i__4].r = work[i__1].r, work[i__4].i = work[i__1].i; + rwork[j] = rwork[j - *ka]; +/* L150: */ + } + i__2 = j1; + i__3 = ka1; + for (j = j2; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { + +/* create nonzero element a(j-ka,j+1) outside the band */ +/* and store it in WORK(j) */ + + i__4 = j; + i__1 = j; + i__5 = (j + 1) * ab_dim1 + 1; + q__1.r = work[i__1].r * ab[i__5].r - work[i__1].i * ab[i__5] + .i, q__1.i = work[i__1].r * ab[i__5].i + work[i__1].i + * ab[i__5].r; + work[i__4].r = q__1.r, work[i__4].i = q__1.i; + i__4 = (j + 1) * ab_dim1 + 1; + i__1 = j; + i__5 = (j + 1) * ab_dim1 + 1; + q__1.r = rwork[i__1] * ab[i__5].r, q__1.i = rwork[i__1] * ab[ + i__5].i; + ab[i__4].r = q__1.r, ab[i__4].i = q__1.i; +/* L160: */ + } + if (update) { + if (i__ - k < *n - *ka && k <= kbt) { + i__3 = i__ - k + *ka; + i__2 = i__ - k; + work[i__3].r = work[i__2].r, work[i__3].i = work[i__2].i; + } + } +/* L170: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__3 = 1, i__2 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__3,i__2) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + clargv_(&nr, &ab[j2 * ab_dim1 + 1], &inca, &work[j2], &ka1, & + rwork[j2], &ka1); + +/* apply rotations in 2nd set from the right */ + + i__3 = *ka - 1; + for (l = 1; l <= i__3; ++l) { + clartv_(&nr, &ab[ka1 - l + j2 * ab_dim1], &inca, &ab[*ka + - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2], & + work[j2], &ka1); +/* L180: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + clar2v_(&nr, &ab[ka1 + j2 * ab_dim1], &ab[ka1 + (j2 + 1) * + ab_dim1], &ab[*ka + (j2 + 1) * ab_dim1], &inca, & + rwork[j2], &work[j2], &ka1); + + clacgv_(&nr, &work[j2], &ka1); + } + +/* start applying rotations in 2nd set from the left */ + + i__3 = *kb - k + 1; + for (l = *ka - 1; l >= i__3; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & + ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & + rwork[j2], &work[j2], &ka1); + } +/* L190: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__3 = j1; + i__2 = ka1; + for (j = j2; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { + i__4 = *n - m; + r_cnjg(&q__1, &work[j]); + crot_(&i__4, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &rwork[j], &q__1); +/* L200: */ + } + } +/* L210: */ + } + + i__2 = *kb - 1; + for (k = 1; k <= i__2; ++k) { +/* Computing MAX */ + i__3 = 1, i__4 = k - i0 + 2; + j2 = i__ - k - 1 + f2cmax(i__3,i__4) * ka1; + +/* finish applying rotations in 1st set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[l + (j2 + ka1 - l) * ab_dim1], &inca, & + ab[l + 1 + (j2 + ka1 - l) * ab_dim1], &inca, & + rwork[j2 - m], &work[j2 - m], &ka1); + } +/* L220: */ + } +/* L230: */ + } + + if (*kb > 1) { + i__2 = j2 + *ka; + for (j = *n - 1; j >= i__2; --j) { + rwork[j - m] = rwork[j - *ka - m]; + i__3 = j - m; + i__4 = j - *ka - m; + work[i__3].r = work[i__4].r, work[i__3].i = work[i__4].i; +/* L240: */ + } + } + + } else { + +/* Transform A, working with the lower triangle */ + + if (update) { + +/* Form inv(S(i))**H * A * inv(S(i)) */ + + i__2 = i__ * bb_dim1 + 1; + bii = bb[i__2].r; + i__2 = i__ * ab_dim1 + 1; + i__3 = i__ * ab_dim1 + 1; + r__1 = ab[i__3].r / bii / bii; + ab[i__2].r = r__1, ab[i__2].i = 0.f; + i__2 = i1; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j - i__ + 1 + i__ * ab_dim1; + i__4 = j - i__ + 1 + i__ * ab_dim1; + q__1.r = ab[i__4].r / bii, q__1.i = ab[i__4].i / bii; + ab[i__3].r = q__1.r, ab[i__3].i = q__1.i; +/* L250: */ + } +/* Computing MAX */ + i__2 = 1, i__3 = i__ - *ka; + i__4 = i__ - 1; + for (j = f2cmax(i__2,i__3); j <= i__4; ++j) { + i__2 = i__ - j + 1 + j * ab_dim1; + i__3 = i__ - j + 1 + j * ab_dim1; + q__1.r = ab[i__3].r / bii, q__1.i = ab[i__3].i / bii; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; +/* L260: */ + } + i__4 = i__ - 1; + for (k = i__ - kbt; k <= i__4; ++k) { + i__2 = k; + for (j = i__ - kbt; j <= i__2; ++j) { + i__3 = k - j + 1 + j * ab_dim1; + i__1 = k - j + 1 + j * ab_dim1; + i__5 = i__ - j + 1 + j * bb_dim1; + r_cnjg(&q__5, &ab[i__ - k + 1 + k * ab_dim1]); + q__4.r = bb[i__5].r * q__5.r - bb[i__5].i * q__5.i, + q__4.i = bb[i__5].r * q__5.i + bb[i__5].i * + q__5.r; + q__3.r = ab[i__1].r - q__4.r, q__3.i = ab[i__1].i - + q__4.i; + r_cnjg(&q__7, &bb[i__ - k + 1 + k * bb_dim1]); + i__6 = i__ - j + 1 + j * ab_dim1; + q__6.r = q__7.r * ab[i__6].r - q__7.i * ab[i__6].i, + q__6.i = q__7.r * ab[i__6].i + q__7.i * ab[i__6] + .r; + q__2.r = q__3.r - q__6.r, q__2.i = q__3.i - q__6.i; + i__7 = i__ * ab_dim1 + 1; + r__1 = ab[i__7].r; + i__8 = i__ - j + 1 + j * bb_dim1; + q__9.r = r__1 * bb[i__8].r, q__9.i = r__1 * bb[i__8].i; + r_cnjg(&q__10, &bb[i__ - k + 1 + k * bb_dim1]); + q__8.r = q__9.r * q__10.r - q__9.i * q__10.i, q__8.i = + q__9.r * q__10.i + q__9.i * q__10.r; + q__1.r = q__2.r + q__8.r, q__1.i = q__2.i + q__8.i; + ab[i__3].r = q__1.r, ab[i__3].i = q__1.i; +/* L270: */ + } +/* Computing MAX */ + i__2 = 1, i__3 = i__ - *ka; + i__1 = i__ - kbt - 1; + for (j = f2cmax(i__2,i__3); j <= i__1; ++j) { + i__2 = k - j + 1 + j * ab_dim1; + i__3 = k - j + 1 + j * ab_dim1; + r_cnjg(&q__3, &bb[i__ - k + 1 + k * bb_dim1]); + i__5 = i__ - j + 1 + j * ab_dim1; + q__2.r = q__3.r * ab[i__5].r - q__3.i * ab[i__5].i, + q__2.i = q__3.r * ab[i__5].i + q__3.i * ab[i__5] + .r; + q__1.r = ab[i__3].r - q__2.r, q__1.i = ab[i__3].i - + q__2.i; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; +/* L280: */ + } +/* L290: */ + } + i__4 = i1; + for (j = i__; j <= i__4; ++j) { +/* Computing MAX */ + i__1 = j - *ka, i__2 = i__ - kbt; + i__3 = i__ - 1; + for (k = f2cmax(i__1,i__2); k <= i__3; ++k) { + i__1 = j - k + 1 + k * ab_dim1; + i__2 = j - k + 1 + k * ab_dim1; + i__5 = i__ - k + 1 + k * bb_dim1; + i__6 = j - i__ + 1 + i__ * ab_dim1; + q__2.r = bb[i__5].r * ab[i__6].r - bb[i__5].i * ab[i__6] + .i, q__2.i = bb[i__5].r * ab[i__6].i + bb[i__5].i + * ab[i__6].r; + q__1.r = ab[i__2].r - q__2.r, q__1.i = ab[i__2].i - + q__2.i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L300: */ + } +/* L310: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + i__4 = *n - m; + r__1 = 1.f / bii; + csscal_(&i__4, &r__1, &x[m + 1 + i__ * x_dim1], &c__1); + if (kbt > 0) { + i__4 = *n - m; + q__1.r = -1.f, q__1.i = 0.f; + i__3 = *ldbb - 1; + cgeru_(&i__4, &kbt, &q__1, &x[m + 1 + i__ * x_dim1], & + c__1, &bb[kbt + 1 + (i__ - kbt) * bb_dim1], &i__3, + &x[m + 1 + (i__ - kbt) * x_dim1], ldx); + } + } + +/* store a(i1,i) in RA1 for use in next loop over K */ + + i__4 = i1 - i__ + 1 + i__ * ab_dim1; + ra1.r = ab[i__4].r, ra1.i = ab[i__4].i; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions down toward the bottom of the */ +/* band */ + + i__4 = *kb - 1; + for (k = 1; k <= i__4; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ - k + *ka < *n && i__ - k > 1) { + +/* generate rotation to annihilate a(i-k+ka+1,i) */ + + clartg_(&ab[ka1 - k + i__ * ab_dim1], &ra1, &rwork[i__ - + k + *ka - m], &work[i__ - k + *ka - m], &ra); + +/* create nonzero element a(i-k+ka+1,i-k) outside the */ +/* band and store it in WORK(i-k) */ + + i__3 = k + 1 + (i__ - k) * bb_dim1; + q__2.r = -bb[i__3].r, q__2.i = -bb[i__3].i; + q__1.r = q__2.r * ra1.r - q__2.i * ra1.i, q__1.i = q__2.r + * ra1.i + q__2.i * ra1.r; + t.r = q__1.r, t.i = q__1.i; + i__3 = i__ - k; + i__1 = i__ - k + *ka - m; + q__2.r = rwork[i__1] * t.r, q__2.i = rwork[i__1] * t.i; + r_cnjg(&q__4, &work[i__ - k + *ka - m]); + i__2 = ka1 + (i__ - k) * ab_dim1; + q__3.r = q__4.r * ab[i__2].r - q__4.i * ab[i__2].i, + q__3.i = q__4.r * ab[i__2].i + q__4.i * ab[i__2] + .r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + i__3 = ka1 + (i__ - k) * ab_dim1; + i__1 = i__ - k + *ka - m; + q__2.r = work[i__1].r * t.r - work[i__1].i * t.i, q__2.i = + work[i__1].r * t.i + work[i__1].i * t.r; + i__2 = i__ - k + *ka - m; + i__5 = ka1 + (i__ - k) * ab_dim1; + q__3.r = rwork[i__2] * ab[i__5].r, q__3.i = rwork[i__2] * + ab[i__5].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + ab[i__3].r = q__1.r, ab[i__3].i = q__1.i; + ra1.r = ra.r, ra1.i = ra.i; + } + } +/* Computing MAX */ + i__3 = 1, i__1 = k - i0 + 2; + j2 = i__ - k - 1 + f2cmax(i__3,i__1) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (update) { +/* Computing MAX */ + i__3 = j2, i__1 = i__ + (*ka << 1) - k + 1; + j2t = f2cmax(i__3,i__1); + } else { + j2t = j2; + } + nrt = (*n - j2t + *ka) / ka1; + i__3 = j1; + i__1 = ka1; + for (j = j2t; i__1 < 0 ? j >= i__3 : j <= i__3; j += i__1) { + +/* create nonzero element a(j+1,j-ka) outside the band */ +/* and store it in WORK(j-m) */ + + i__2 = j - m; + i__5 = j - m; + i__6 = ka1 + (j - *ka + 1) * ab_dim1; + q__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * ab[i__6] + .i, q__1.i = work[i__5].r * ab[i__6].i + work[i__5].i + * ab[i__6].r; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; + i__2 = ka1 + (j - *ka + 1) * ab_dim1; + i__5 = j - m; + i__6 = ka1 + (j - *ka + 1) * ab_dim1; + q__1.r = rwork[i__5] * ab[i__6].r, q__1.i = rwork[i__5] * ab[ + i__6].i; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; +/* L320: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + clargv_(&nrt, &ab[ka1 + (j2t - *ka) * ab_dim1], &inca, &work[ + j2t - m], &ka1, &rwork[j2t - m], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the left */ + + i__1 = *ka - 1; + for (l = 1; l <= i__1; ++l) { + clartv_(&nr, &ab[l + 1 + (j2 - l) * ab_dim1], &inca, &ab[ + l + 2 + (j2 - l) * ab_dim1], &inca, &rwork[j2 - m] + , &work[j2 - m], &ka1); +/* L330: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + clar2v_(&nr, &ab[j2 * ab_dim1 + 1], &ab[(j2 + 1) * ab_dim1 + + 1], &ab[j2 * ab_dim1 + 2], &inca, &rwork[j2 - m], & + work[j2 - m], &ka1); + + clacgv_(&nr, &work[j2 - m], &ka1); + } + +/* start applying rotations in 1st set from the right */ + + i__1 = *kb - k + 1; + for (l = *ka - 1; l >= i__1; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ + ka1 - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2 - + m], &work[j2 - m], &ka1); + } +/* L340: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__1 = j1; + i__3 = ka1; + for (j = j2; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { + i__2 = *n - m; + crot_(&i__2, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &rwork[j - m], &work[j - m] + ); +/* L350: */ + } + } +/* L360: */ + } + + if (update) { + if (i2 <= *n && kbt > 0) { + +/* create nonzero element a(i-kbt+ka+1,i-kbt) outside the */ +/* band and store it in WORK(i-kbt) */ + + i__4 = i__ - kbt; + i__3 = kbt + 1 + (i__ - kbt) * bb_dim1; + q__2.r = -bb[i__3].r, q__2.i = -bb[i__3].i; + q__1.r = q__2.r * ra1.r - q__2.i * ra1.i, q__1.i = q__2.r * + ra1.i + q__2.i * ra1.r; + work[i__4].r = q__1.r, work[i__4].i = q__1.i; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__4 = 2, i__3 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__4,i__3) * ka1; + } else { +/* Computing MAX */ + i__4 = 1, i__3 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__4,i__3) * ka1; + } + +/* finish applying rotations in 2nd set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + *ka + l) / ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[ka1 - l + 1 + (j2 - *ka) * ab_dim1], & + inca, &ab[ka1 - l + (j2 - *ka + 1) * ab_dim1], & + inca, &rwork[j2 - *ka], &work[j2 - *ka], &ka1); + } +/* L370: */ + } + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + i__4 = j2; + i__3 = -ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + i__1 = j; + i__2 = j - *ka; + work[i__1].r = work[i__2].r, work[i__1].i = work[i__2].i; + rwork[j] = rwork[j - *ka]; +/* L380: */ + } + i__3 = j1; + i__4 = ka1; + for (j = j2; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j+1,j-ka) outside the band */ +/* and store it in WORK(j) */ + + i__1 = j; + i__2 = j; + i__5 = ka1 + (j - *ka + 1) * ab_dim1; + q__1.r = work[i__2].r * ab[i__5].r - work[i__2].i * ab[i__5] + .i, q__1.i = work[i__2].r * ab[i__5].i + work[i__2].i + * ab[i__5].r; + work[i__1].r = q__1.r, work[i__1].i = q__1.i; + i__1 = ka1 + (j - *ka + 1) * ab_dim1; + i__2 = j; + i__5 = ka1 + (j - *ka + 1) * ab_dim1; + q__1.r = rwork[i__2] * ab[i__5].r, q__1.i = rwork[i__2] * ab[ + i__5].i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L390: */ + } + if (update) { + if (i__ - k < *n - *ka && k <= kbt) { + i__4 = i__ - k + *ka; + i__3 = i__ - k; + work[i__4].r = work[i__3].r, work[i__4].i = work[i__3].i; + } + } +/* L400: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__4 = 1, i__3 = k - i0 + 1; + j2 = i__ - k - 1 + f2cmax(i__4,i__3) * ka1; + nr = (*n - j2 + *ka) / ka1; + j1 = j2 + (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + clargv_(&nr, &ab[ka1 + (j2 - *ka) * ab_dim1], &inca, &work[j2] + , &ka1, &rwork[j2], &ka1); + +/* apply rotations in 2nd set from the left */ + + i__4 = *ka - 1; + for (l = 1; l <= i__4; ++l) { + clartv_(&nr, &ab[l + 1 + (j2 - l) * ab_dim1], &inca, &ab[ + l + 2 + (j2 - l) * ab_dim1], &inca, &rwork[j2], & + work[j2], &ka1); +/* L410: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + clar2v_(&nr, &ab[j2 * ab_dim1 + 1], &ab[(j2 + 1) * ab_dim1 + + 1], &ab[j2 * ab_dim1 + 2], &inca, &rwork[j2], &work[ + j2], &ka1); + + clacgv_(&nr, &work[j2], &ka1); + } + +/* start applying rotations in 2nd set from the right */ + + i__4 = *kb - k + 1; + for (l = *ka - 1; l >= i__4; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ + ka1 - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2], + &work[j2], &ka1); + } +/* L420: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__4 = j1; + i__3 = ka1; + for (j = j2; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + i__1 = *n - m; + crot_(&i__1, &x[m + 1 + j * x_dim1], &c__1, &x[m + 1 + (j + + 1) * x_dim1], &c__1, &rwork[j], &work[j]); +/* L430: */ + } + } +/* L440: */ + } + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { +/* Computing MAX */ + i__4 = 1, i__1 = k - i0 + 2; + j2 = i__ - k - 1 + f2cmax(i__4,i__1) * ka1; + +/* finish applying rotations in 1st set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (*n - j2 + l) / ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[ka1 - l + 1 + j2 * ab_dim1], &inca, &ab[ + ka1 - l + (j2 + 1) * ab_dim1], &inca, &rwork[j2 - + m], &work[j2 - m], &ka1); + } +/* L450: */ + } +/* L460: */ + } + + if (*kb > 1) { + i__3 = j2 + *ka; + for (j = *n - 1; j >= i__3; --j) { + rwork[j - m] = rwork[j - *ka - m]; + i__4 = j - m; + i__1 = j - *ka - m; + work[i__4].r = work[i__1].r, work[i__4].i = work[i__1].i; +/* L470: */ + } + } + + } + + goto L10; + +L480: + +/* **************************** Phase 2 ***************************** */ + +/* The logical structure of this phase is: */ + +/* UPDATE = .TRUE. */ +/* DO I = 1, M */ +/* use S(i) to update A and create a new bulge */ +/* apply rotations to push all bulges KA positions upward */ +/* END DO */ +/* UPDATE = .FALSE. */ +/* DO I = M - KA - 1, 2, -1 */ +/* apply rotations to push all bulges KA positions upward */ +/* END DO */ + +/* To avoid duplicating code, the two loops are merged. */ + + update = TRUE_; + i__ = 0; +L490: + if (update) { + ++i__; +/* Computing MIN */ + i__3 = *kb, i__4 = m - i__; + kbt = f2cmin(i__3,i__4); + i0 = i__ + 1; +/* Computing MAX */ + i__3 = 1, i__4 = i__ - *ka; + i1 = f2cmax(i__3,i__4); + i2 = i__ + kbt - ka1; + if (i__ > m) { + update = FALSE_; + --i__; + i0 = m + 1; + if (*ka == 0) { + return 0; + } + goto L490; + } + } else { + i__ -= *ka; + if (i__ < 2) { + return 0; + } + } + + if (i__ < m - kbt) { + nx = m; + } else { + nx = *n; + } + + if (upper) { + +/* Transform A, working with the upper triangle */ + + if (update) { + +/* Form inv(S(i))**H * A * inv(S(i)) */ + + i__3 = kb1 + i__ * bb_dim1; + bii = bb[i__3].r; + i__3 = ka1 + i__ * ab_dim1; + i__4 = ka1 + i__ * ab_dim1; + r__1 = ab[i__4].r / bii / bii; + ab[i__3].r = r__1, ab[i__3].i = 0.f; + i__3 = i__ - 1; + for (j = i1; j <= i__3; ++j) { + i__4 = j - i__ + ka1 + i__ * ab_dim1; + i__1 = j - i__ + ka1 + i__ * ab_dim1; + q__1.r = ab[i__1].r / bii, q__1.i = ab[i__1].i / bii; + ab[i__4].r = q__1.r, ab[i__4].i = q__1.i; +/* L500: */ + } +/* Computing MIN */ + i__4 = *n, i__1 = i__ + *ka; + i__3 = f2cmin(i__4,i__1); + for (j = i__ + 1; j <= i__3; ++j) { + i__4 = i__ - j + ka1 + j * ab_dim1; + i__1 = i__ - j + ka1 + j * ab_dim1; + q__1.r = ab[i__1].r / bii, q__1.i = ab[i__1].i / bii; + ab[i__4].r = q__1.r, ab[i__4].i = q__1.i; +/* L510: */ + } + i__3 = i__ + kbt; + for (k = i__ + 1; k <= i__3; ++k) { + i__4 = i__ + kbt; + for (j = k; j <= i__4; ++j) { + i__1 = k - j + ka1 + j * ab_dim1; + i__2 = k - j + ka1 + j * ab_dim1; + i__5 = i__ - j + kb1 + j * bb_dim1; + r_cnjg(&q__5, &ab[i__ - k + ka1 + k * ab_dim1]); + q__4.r = bb[i__5].r * q__5.r - bb[i__5].i * q__5.i, + q__4.i = bb[i__5].r * q__5.i + bb[i__5].i * + q__5.r; + q__3.r = ab[i__2].r - q__4.r, q__3.i = ab[i__2].i - + q__4.i; + r_cnjg(&q__7, &bb[i__ - k + kb1 + k * bb_dim1]); + i__6 = i__ - j + ka1 + j * ab_dim1; + q__6.r = q__7.r * ab[i__6].r - q__7.i * ab[i__6].i, + q__6.i = q__7.r * ab[i__6].i + q__7.i * ab[i__6] + .r; + q__2.r = q__3.r - q__6.r, q__2.i = q__3.i - q__6.i; + i__7 = ka1 + i__ * ab_dim1; + r__1 = ab[i__7].r; + i__8 = i__ - j + kb1 + j * bb_dim1; + q__9.r = r__1 * bb[i__8].r, q__9.i = r__1 * bb[i__8].i; + r_cnjg(&q__10, &bb[i__ - k + kb1 + k * bb_dim1]); + q__8.r = q__9.r * q__10.r - q__9.i * q__10.i, q__8.i = + q__9.r * q__10.i + q__9.i * q__10.r; + q__1.r = q__2.r + q__8.r, q__1.i = q__2.i + q__8.i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L520: */ + } +/* Computing MIN */ + i__1 = *n, i__2 = i__ + *ka; + i__4 = f2cmin(i__1,i__2); + for (j = i__ + kbt + 1; j <= i__4; ++j) { + i__1 = k - j + ka1 + j * ab_dim1; + i__2 = k - j + ka1 + j * ab_dim1; + r_cnjg(&q__3, &bb[i__ - k + kb1 + k * bb_dim1]); + i__5 = i__ - j + ka1 + j * ab_dim1; + q__2.r = q__3.r * ab[i__5].r - q__3.i * ab[i__5].i, + q__2.i = q__3.r * ab[i__5].i + q__3.i * ab[i__5] + .r; + q__1.r = ab[i__2].r - q__2.r, q__1.i = ab[i__2].i - + q__2.i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L530: */ + } +/* L540: */ + } + i__3 = i__; + for (j = i1; j <= i__3; ++j) { +/* Computing MIN */ + i__1 = j + *ka, i__2 = i__ + kbt; + i__4 = f2cmin(i__1,i__2); + for (k = i__ + 1; k <= i__4; ++k) { + i__1 = j - k + ka1 + k * ab_dim1; + i__2 = j - k + ka1 + k * ab_dim1; + i__5 = i__ - k + kb1 + k * bb_dim1; + i__6 = j - i__ + ka1 + i__ * ab_dim1; + q__2.r = bb[i__5].r * ab[i__6].r - bb[i__5].i * ab[i__6] + .i, q__2.i = bb[i__5].r * ab[i__6].i + bb[i__5].i + * ab[i__6].r; + q__1.r = ab[i__2].r - q__2.r, q__1.i = ab[i__2].i - + q__2.i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L550: */ + } +/* L560: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + r__1 = 1.f / bii; + csscal_(&nx, &r__1, &x[i__ * x_dim1 + 1], &c__1); + if (kbt > 0) { + q__1.r = -1.f, q__1.i = 0.f; + i__3 = *ldbb - 1; + cgeru_(&nx, &kbt, &q__1, &x[i__ * x_dim1 + 1], &c__1, &bb[ + *kb + (i__ + 1) * bb_dim1], &i__3, &x[(i__ + 1) * + x_dim1 + 1], ldx); + } + } + +/* store a(i1,i) in RA1 for use in next loop over K */ + + i__3 = i1 - i__ + ka1 + i__ * ab_dim1; + ra1.r = ab[i__3].r, ra1.i = ab[i__3].i; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions up toward the top of the band */ + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ + k - ka1 > 0 && i__ + k < m) { + +/* generate rotation to annihilate a(i+k-ka-1,i) */ + + clartg_(&ab[k + 1 + i__ * ab_dim1], &ra1, &rwork[i__ + k + - *ka], &work[i__ + k - *ka], &ra); + +/* create nonzero element a(i+k-ka-1,i+k) outside the */ +/* band and store it in WORK(m-kb+i+k) */ + + i__4 = kb1 - k + (i__ + k) * bb_dim1; + q__2.r = -bb[i__4].r, q__2.i = -bb[i__4].i; + q__1.r = q__2.r * ra1.r - q__2.i * ra1.i, q__1.i = q__2.r + * ra1.i + q__2.i * ra1.r; + t.r = q__1.r, t.i = q__1.i; + i__4 = m - *kb + i__ + k; + i__1 = i__ + k - *ka; + q__2.r = rwork[i__1] * t.r, q__2.i = rwork[i__1] * t.i; + r_cnjg(&q__4, &work[i__ + k - *ka]); + i__2 = (i__ + k) * ab_dim1 + 1; + q__3.r = q__4.r * ab[i__2].r - q__4.i * ab[i__2].i, + q__3.i = q__4.r * ab[i__2].i + q__4.i * ab[i__2] + .r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + work[i__4].r = q__1.r, work[i__4].i = q__1.i; + i__4 = (i__ + k) * ab_dim1 + 1; + i__1 = i__ + k - *ka; + q__2.r = work[i__1].r * t.r - work[i__1].i * t.i, q__2.i = + work[i__1].r * t.i + work[i__1].i * t.r; + i__2 = i__ + k - *ka; + i__5 = (i__ + k) * ab_dim1 + 1; + q__3.r = rwork[i__2] * ab[i__5].r, q__3.i = rwork[i__2] * + ab[i__5].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + ab[i__4].r = q__1.r, ab[i__4].i = q__1.i; + ra1.r = ra.r, ra1.i = ra.i; + } + } +/* Computing MAX */ + i__4 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - f2cmax(i__4,i__1) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (update) { +/* Computing MIN */ + i__4 = j2, i__1 = i__ - (*ka << 1) + k - 1; + j2t = f2cmin(i__4,i__1); + } else { + j2t = j2; + } + nrt = (j2t + *ka - 1) / ka1; + i__4 = j2t; + i__1 = ka1; + for (j = j1; i__1 < 0 ? j >= i__4 : j <= i__4; j += i__1) { + +/* create nonzero element a(j-1,j+ka) outside the band */ +/* and store it in WORK(j) */ + + i__2 = j; + i__5 = j; + i__6 = (j + *ka - 1) * ab_dim1 + 1; + q__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * ab[i__6] + .i, q__1.i = work[i__5].r * ab[i__6].i + work[i__5].i + * ab[i__6].r; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; + i__2 = (j + *ka - 1) * ab_dim1 + 1; + i__5 = j; + i__6 = (j + *ka - 1) * ab_dim1 + 1; + q__1.r = rwork[i__5] * ab[i__6].r, q__1.i = rwork[i__5] * ab[ + i__6].i; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; +/* L570: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + clargv_(&nrt, &ab[(j1 + *ka) * ab_dim1 + 1], &inca, &work[j1], + &ka1, &rwork[j1], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the left */ + + i__1 = *ka - 1; + for (l = 1; l <= i__1; ++l) { + clartv_(&nr, &ab[ka1 - l + (j1 + l) * ab_dim1], &inca, & + ab[*ka - l + (j1 + l) * ab_dim1], &inca, &rwork[ + j1], &work[j1], &ka1); +/* L580: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + clar2v_(&nr, &ab[ka1 + j1 * ab_dim1], &ab[ka1 + (j1 - 1) * + ab_dim1], &ab[*ka + j1 * ab_dim1], &inca, &rwork[j1], + &work[j1], &ka1); + + clacgv_(&nr, &work[j1], &ka1); + } + +/* start applying rotations in 1st set from the right */ + + i__1 = *kb - k + 1; + for (l = *ka - 1; l >= i__1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( + j1t - 1) * ab_dim1], &inca, &rwork[j1t], &work[ + j1t], &ka1); + } +/* L590: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__1 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__1 : j <= i__1; j += i__4) { + crot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &rwork[j], &work[j]); +/* L600: */ + } + } +/* L610: */ + } + + if (update) { + if (i2 > 0 && kbt > 0) { + +/* create nonzero element a(i+kbt-ka-1,i+kbt) outside the */ +/* band and store it in WORK(m-kb+i+kbt) */ + + i__3 = m - *kb + i__ + kbt; + i__4 = kb1 - kbt + (i__ + kbt) * bb_dim1; + q__2.r = -bb[i__4].r, q__2.i = -bb[i__4].i; + q__1.r = q__2.r * ra1.r - q__2.i * ra1.i, q__1.i = q__2.r * + ra1.i + q__2.i * ra1.r; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__3 = 2, i__4 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__3,i__4) * ka1; + } else { +/* Computing MAX */ + i__3 = 1, i__4 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__3,i__4) * ka1; + } + +/* finish applying rotations in 2nd set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + *ka + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[l + (j1t + *ka) * ab_dim1], &inca, &ab[ + l + 1 + (j1t + *ka - 1) * ab_dim1], &inca, &rwork[ + m - *kb + j1t + *ka], &work[m - *kb + j1t + *ka], + &ka1); + } +/* L620: */ + } + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + i__3 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + i__1 = m - *kb + j; + i__2 = m - *kb + j + *ka; + work[i__1].r = work[i__2].r, work[i__1].i = work[i__2].i; + rwork[m - *kb + j] = rwork[m - *kb + j + *ka]; +/* L630: */ + } + i__4 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + +/* create nonzero element a(j-1,j+ka) outside the band */ +/* and store it in WORK(m-kb+j) */ + + i__1 = m - *kb + j; + i__2 = m - *kb + j; + i__5 = (j + *ka - 1) * ab_dim1 + 1; + q__1.r = work[i__2].r * ab[i__5].r - work[i__2].i * ab[i__5] + .i, q__1.i = work[i__2].r * ab[i__5].i + work[i__2].i + * ab[i__5].r; + work[i__1].r = q__1.r, work[i__1].i = q__1.i; + i__1 = (j + *ka - 1) * ab_dim1 + 1; + i__2 = m - *kb + j; + i__5 = (j + *ka - 1) * ab_dim1 + 1; + q__1.r = rwork[i__2] * ab[i__5].r, q__1.i = rwork[i__2] * ab[ + i__5].i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L640: */ + } + if (update) { + if (i__ + k > ka1 && k <= kbt) { + i__3 = m - *kb + i__ + k - *ka; + i__4 = m - *kb + i__ + k; + work[i__3].r = work[i__4].r, work[i__3].i = work[i__4].i; + } + } +/* L650: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__3 = 1, i__4 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__3,i__4) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + clargv_(&nr, &ab[(j1 + *ka) * ab_dim1 + 1], &inca, &work[m - * + kb + j1], &ka1, &rwork[m - *kb + j1], &ka1); + +/* apply rotations in 2nd set from the left */ + + i__3 = *ka - 1; + for (l = 1; l <= i__3; ++l) { + clartv_(&nr, &ab[ka1 - l + (j1 + l) * ab_dim1], &inca, & + ab[*ka - l + (j1 + l) * ab_dim1], &inca, &rwork[m + - *kb + j1], &work[m - *kb + j1], &ka1); +/* L660: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + clar2v_(&nr, &ab[ka1 + j1 * ab_dim1], &ab[ka1 + (j1 - 1) * + ab_dim1], &ab[*ka + j1 * ab_dim1], &inca, &rwork[m - * + kb + j1], &work[m - *kb + j1], &ka1); + + clacgv_(&nr, &work[m - *kb + j1], &ka1); + } + +/* start applying rotations in 2nd set from the right */ + + i__3 = *kb - k + 1; + for (l = *ka - 1; l >= i__3; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( + j1t - 1) * ab_dim1], &inca, &rwork[m - *kb + j1t], + &work[m - *kb + j1t], &ka1); + } +/* L670: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__3 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + crot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &rwork[m - *kb + j], &work[m - *kb + + j]); +/* L680: */ + } + } +/* L690: */ + } + + i__4 = *kb - 1; + for (k = 1; k <= i__4; ++k) { +/* Computing MAX */ + i__3 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - f2cmax(i__3,i__1) * ka1; + +/* finish applying rotations in 1st set from the right */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[l + j1t * ab_dim1], &inca, &ab[l + 1 + ( + j1t - 1) * ab_dim1], &inca, &rwork[j1t], &work[ + j1t], &ka1); + } +/* L700: */ + } +/* L710: */ + } + + if (*kb > 1) { + i__4 = i2 - *ka; + for (j = 2; j <= i__4; ++j) { + rwork[j] = rwork[j + *ka]; + i__3 = j; + i__1 = j + *ka; + work[i__3].r = work[i__1].r, work[i__3].i = work[i__1].i; +/* L720: */ + } + } + + } else { + +/* Transform A, working with the lower triangle */ + + if (update) { + +/* Form inv(S(i))**H * A * inv(S(i)) */ + + i__4 = i__ * bb_dim1 + 1; + bii = bb[i__4].r; + i__4 = i__ * ab_dim1 + 1; + i__3 = i__ * ab_dim1 + 1; + r__1 = ab[i__3].r / bii / bii; + ab[i__4].r = r__1, ab[i__4].i = 0.f; + i__4 = i__ - 1; + for (j = i1; j <= i__4; ++j) { + i__3 = i__ - j + 1 + j * ab_dim1; + i__1 = i__ - j + 1 + j * ab_dim1; + q__1.r = ab[i__1].r / bii, q__1.i = ab[i__1].i / bii; + ab[i__3].r = q__1.r, ab[i__3].i = q__1.i; +/* L730: */ + } +/* Computing MIN */ + i__3 = *n, i__1 = i__ + *ka; + i__4 = f2cmin(i__3,i__1); + for (j = i__ + 1; j <= i__4; ++j) { + i__3 = j - i__ + 1 + i__ * ab_dim1; + i__1 = j - i__ + 1 + i__ * ab_dim1; + q__1.r = ab[i__1].r / bii, q__1.i = ab[i__1].i / bii; + ab[i__3].r = q__1.r, ab[i__3].i = q__1.i; +/* L740: */ + } + i__4 = i__ + kbt; + for (k = i__ + 1; k <= i__4; ++k) { + i__3 = i__ + kbt; + for (j = k; j <= i__3; ++j) { + i__1 = j - k + 1 + k * ab_dim1; + i__2 = j - k + 1 + k * ab_dim1; + i__5 = j - i__ + 1 + i__ * bb_dim1; + r_cnjg(&q__5, &ab[k - i__ + 1 + i__ * ab_dim1]); + q__4.r = bb[i__5].r * q__5.r - bb[i__5].i * q__5.i, + q__4.i = bb[i__5].r * q__5.i + bb[i__5].i * + q__5.r; + q__3.r = ab[i__2].r - q__4.r, q__3.i = ab[i__2].i - + q__4.i; + r_cnjg(&q__7, &bb[k - i__ + 1 + i__ * bb_dim1]); + i__6 = j - i__ + 1 + i__ * ab_dim1; + q__6.r = q__7.r * ab[i__6].r - q__7.i * ab[i__6].i, + q__6.i = q__7.r * ab[i__6].i + q__7.i * ab[i__6] + .r; + q__2.r = q__3.r - q__6.r, q__2.i = q__3.i - q__6.i; + i__7 = i__ * ab_dim1 + 1; + r__1 = ab[i__7].r; + i__8 = j - i__ + 1 + i__ * bb_dim1; + q__9.r = r__1 * bb[i__8].r, q__9.i = r__1 * bb[i__8].i; + r_cnjg(&q__10, &bb[k - i__ + 1 + i__ * bb_dim1]); + q__8.r = q__9.r * q__10.r - q__9.i * q__10.i, q__8.i = + q__9.r * q__10.i + q__9.i * q__10.r; + q__1.r = q__2.r + q__8.r, q__1.i = q__2.i + q__8.i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L750: */ + } +/* Computing MIN */ + i__1 = *n, i__2 = i__ + *ka; + i__3 = f2cmin(i__1,i__2); + for (j = i__ + kbt + 1; j <= i__3; ++j) { + i__1 = j - k + 1 + k * ab_dim1; + i__2 = j - k + 1 + k * ab_dim1; + r_cnjg(&q__3, &bb[k - i__ + 1 + i__ * bb_dim1]); + i__5 = j - i__ + 1 + i__ * ab_dim1; + q__2.r = q__3.r * ab[i__5].r - q__3.i * ab[i__5].i, + q__2.i = q__3.r * ab[i__5].i + q__3.i * ab[i__5] + .r; + q__1.r = ab[i__2].r - q__2.r, q__1.i = ab[i__2].i - + q__2.i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L760: */ + } +/* L770: */ + } + i__4 = i__; + for (j = i1; j <= i__4; ++j) { +/* Computing MIN */ + i__1 = j + *ka, i__2 = i__ + kbt; + i__3 = f2cmin(i__1,i__2); + for (k = i__ + 1; k <= i__3; ++k) { + i__1 = k - j + 1 + j * ab_dim1; + i__2 = k - j + 1 + j * ab_dim1; + i__5 = k - i__ + 1 + i__ * bb_dim1; + i__6 = i__ - j + 1 + j * ab_dim1; + q__2.r = bb[i__5].r * ab[i__6].r - bb[i__5].i * ab[i__6] + .i, q__2.i = bb[i__5].r * ab[i__6].i + bb[i__5].i + * ab[i__6].r; + q__1.r = ab[i__2].r - q__2.r, q__1.i = ab[i__2].i - + q__2.i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L780: */ + } +/* L790: */ + } + + if (wantx) { + +/* post-multiply X by inv(S(i)) */ + + r__1 = 1.f / bii; + csscal_(&nx, &r__1, &x[i__ * x_dim1 + 1], &c__1); + if (kbt > 0) { + q__1.r = -1.f, q__1.i = 0.f; + cgerc_(&nx, &kbt, &q__1, &x[i__ * x_dim1 + 1], &c__1, &bb[ + i__ * bb_dim1 + 2], &c__1, &x[(i__ + 1) * x_dim1 + + 1], ldx); + } + } + +/* store a(i,i1) in RA1 for use in next loop over K */ + + i__4 = i__ - i1 + 1 + i1 * ab_dim1; + ra1.r = ab[i__4].r, ra1.i = ab[i__4].i; + } + +/* Generate and apply vectors of rotations to chase all the */ +/* existing bulges KA positions up toward the top of the band */ + + i__4 = *kb - 1; + for (k = 1; k <= i__4; ++k) { + if (update) { + +/* Determine the rotations which would annihilate the bulge */ +/* which has in theory just been created */ + + if (i__ + k - ka1 > 0 && i__ + k < m) { + +/* generate rotation to annihilate a(i,i+k-ka-1) */ + + clartg_(&ab[ka1 - k + (i__ + k - *ka) * ab_dim1], &ra1, & + rwork[i__ + k - *ka], &work[i__ + k - *ka], &ra); + +/* create nonzero element a(i+k,i+k-ka-1) outside the */ +/* band and store it in WORK(m-kb+i+k) */ + + i__3 = k + 1 + i__ * bb_dim1; + q__2.r = -bb[i__3].r, q__2.i = -bb[i__3].i; + q__1.r = q__2.r * ra1.r - q__2.i * ra1.i, q__1.i = q__2.r + * ra1.i + q__2.i * ra1.r; + t.r = q__1.r, t.i = q__1.i; + i__3 = m - *kb + i__ + k; + i__1 = i__ + k - *ka; + q__2.r = rwork[i__1] * t.r, q__2.i = rwork[i__1] * t.i; + r_cnjg(&q__4, &work[i__ + k - *ka]); + i__2 = ka1 + (i__ + k - *ka) * ab_dim1; + q__3.r = q__4.r * ab[i__2].r - q__4.i * ab[i__2].i, + q__3.i = q__4.r * ab[i__2].i + q__4.i * ab[i__2] + .r; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + i__3 = ka1 + (i__ + k - *ka) * ab_dim1; + i__1 = i__ + k - *ka; + q__2.r = work[i__1].r * t.r - work[i__1].i * t.i, q__2.i = + work[i__1].r * t.i + work[i__1].i * t.r; + i__2 = i__ + k - *ka; + i__5 = ka1 + (i__ + k - *ka) * ab_dim1; + q__3.r = rwork[i__2] * ab[i__5].r, q__3.i = rwork[i__2] * + ab[i__5].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + ab[i__3].r = q__1.r, ab[i__3].i = q__1.i; + ra1.r = ra.r, ra1.i = ra.i; + } + } +/* Computing MAX */ + i__3 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - f2cmax(i__3,i__1) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (update) { +/* Computing MIN */ + i__3 = j2, i__1 = i__ - (*ka << 1) + k - 1; + j2t = f2cmin(i__3,i__1); + } else { + j2t = j2; + } + nrt = (j2t + *ka - 1) / ka1; + i__3 = j2t; + i__1 = ka1; + for (j = j1; i__1 < 0 ? j >= i__3 : j <= i__3; j += i__1) { + +/* create nonzero element a(j+ka,j-1) outside the band */ +/* and store it in WORK(j) */ + + i__2 = j; + i__5 = j; + i__6 = ka1 + (j - 1) * ab_dim1; + q__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * ab[i__6] + .i, q__1.i = work[i__5].r * ab[i__6].i + work[i__5].i + * ab[i__6].r; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; + i__2 = ka1 + (j - 1) * ab_dim1; + i__5 = j; + i__6 = ka1 + (j - 1) * ab_dim1; + q__1.r = rwork[i__5] * ab[i__6].r, q__1.i = rwork[i__5] * ab[ + i__6].i; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; +/* L800: */ + } + +/* generate rotations in 1st set to annihilate elements which */ +/* have been created outside the band */ + + if (nrt > 0) { + clargv_(&nrt, &ab[ka1 + j1 * ab_dim1], &inca, &work[j1], &ka1, + &rwork[j1], &ka1); + } + if (nr > 0) { + +/* apply rotations in 1st set from the right */ + + i__1 = *ka - 1; + for (l = 1; l <= i__1; ++l) { + clartv_(&nr, &ab[l + 1 + j1 * ab_dim1], &inca, &ab[l + 2 + + (j1 - 1) * ab_dim1], &inca, &rwork[j1], &work[ + j1], &ka1); +/* L810: */ + } + +/* apply rotations in 1st set from both sides to diagonal */ +/* blocks */ + + clar2v_(&nr, &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + + 1], &ab[(j1 - 1) * ab_dim1 + 2], &inca, &rwork[j1], & + work[j1], &ka1); + + clacgv_(&nr, &work[j1], &ka1); + } + +/* start applying rotations in 1st set from the left */ + + i__1 = *kb - k + 1; + for (l = *ka - 1; l >= i__1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] + , &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], + &inca, &rwork[j1t], &work[j1t], &ka1); + } +/* L820: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 1st set */ + + i__1 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { + r_cnjg(&q__1, &work[j]); + crot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &rwork[j], &q__1); +/* L830: */ + } + } +/* L840: */ + } + + if (update) { + if (i2 > 0 && kbt > 0) { + +/* create nonzero element a(i+kbt,i+kbt-ka-1) outside the */ +/* band and store it in WORK(m-kb+i+kbt) */ + + i__4 = m - *kb + i__ + kbt; + i__3 = kbt + 1 + i__ * bb_dim1; + q__2.r = -bb[i__3].r, q__2.i = -bb[i__3].i; + q__1.r = q__2.r * ra1.r - q__2.i * ra1.i, q__1.i = q__2.r * + ra1.i + q__2.i * ra1.r; + work[i__4].r = q__1.r, work[i__4].i = q__1.i; + } + } + + for (k = *kb; k >= 1; --k) { + if (update) { +/* Computing MAX */ + i__4 = 2, i__3 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__4,i__3) * ka1; + } else { +/* Computing MAX */ + i__4 = 1, i__3 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__4,i__3) * ka1; + } + +/* finish applying rotations in 2nd set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + *ka + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[ka1 - l + 1 + (j1t + l - 1) * ab_dim1], + &inca, &ab[ka1 - l + (j1t + l - 1) * ab_dim1], & + inca, &rwork[m - *kb + j1t + *ka], &work[m - *kb + + j1t + *ka], &ka1); + } +/* L850: */ + } + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + i__4 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + i__1 = m - *kb + j; + i__2 = m - *kb + j + *ka; + work[i__1].r = work[i__2].r, work[i__1].i = work[i__2].i; + rwork[m - *kb + j] = rwork[m - *kb + j + *ka]; +/* L860: */ + } + i__3 = j2; + i__4 = ka1; + for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { + +/* create nonzero element a(j+ka,j-1) outside the band */ +/* and store it in WORK(m-kb+j) */ + + i__1 = m - *kb + j; + i__2 = m - *kb + j; + i__5 = ka1 + (j - 1) * ab_dim1; + q__1.r = work[i__2].r * ab[i__5].r - work[i__2].i * ab[i__5] + .i, q__1.i = work[i__2].r * ab[i__5].i + work[i__2].i + * ab[i__5].r; + work[i__1].r = q__1.r, work[i__1].i = q__1.i; + i__1 = ka1 + (j - 1) * ab_dim1; + i__2 = m - *kb + j; + i__5 = ka1 + (j - 1) * ab_dim1; + q__1.r = rwork[i__2] * ab[i__5].r, q__1.i = rwork[i__2] * ab[ + i__5].i; + ab[i__1].r = q__1.r, ab[i__1].i = q__1.i; +/* L870: */ + } + if (update) { + if (i__ + k > ka1 && k <= kbt) { + i__4 = m - *kb + i__ + k - *ka; + i__3 = m - *kb + i__ + k; + work[i__4].r = work[i__3].r, work[i__4].i = work[i__3].i; + } + } +/* L880: */ + } + + for (k = *kb; k >= 1; --k) { +/* Computing MAX */ + i__4 = 1, i__3 = k + i0 - m; + j2 = i__ + k + 1 - f2cmax(i__4,i__3) * ka1; + nr = (j2 + *ka - 1) / ka1; + j1 = j2 - (nr - 1) * ka1; + if (nr > 0) { + +/* generate rotations in 2nd set to annihilate elements */ +/* which have been created outside the band */ + + clargv_(&nr, &ab[ka1 + j1 * ab_dim1], &inca, &work[m - *kb + + j1], &ka1, &rwork[m - *kb + j1], &ka1); + +/* apply rotations in 2nd set from the right */ + + i__4 = *ka - 1; + for (l = 1; l <= i__4; ++l) { + clartv_(&nr, &ab[l + 1 + j1 * ab_dim1], &inca, &ab[l + 2 + + (j1 - 1) * ab_dim1], &inca, &rwork[m - *kb + j1] + , &work[m - *kb + j1], &ka1); +/* L890: */ + } + +/* apply rotations in 2nd set from both sides to diagonal */ +/* blocks */ + + clar2v_(&nr, &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + + 1], &ab[(j1 - 1) * ab_dim1 + 2], &inca, &rwork[m - * + kb + j1], &work[m - *kb + j1], &ka1); + + clacgv_(&nr, &work[m - *kb + j1], &ka1); + } + +/* start applying rotations in 2nd set from the left */ + + i__4 = *kb - k + 1; + for (l = *ka - 1; l >= i__4; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] + , &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], + &inca, &rwork[m - *kb + j1t], &work[m - *kb + + j1t], &ka1); + } +/* L900: */ + } + + if (wantx) { + +/* post-multiply X by product of rotations in 2nd set */ + + i__4 = j2; + i__3 = ka1; + for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { + r_cnjg(&q__1, &work[m - *kb + j]); + crot_(&nx, &x[j * x_dim1 + 1], &c__1, &x[(j - 1) * x_dim1 + + 1], &c__1, &rwork[m - *kb + j], &q__1); +/* L910: */ + } + } +/* L920: */ + } + + i__3 = *kb - 1; + for (k = 1; k <= i__3; ++k) { +/* Computing MAX */ + i__4 = 1, i__1 = k + i0 - m + 1; + j2 = i__ + k + 1 - f2cmax(i__4,i__1) * ka1; + +/* finish applying rotations in 1st set from the left */ + + for (l = *kb - k; l >= 1; --l) { + nrt = (j2 + l - 1) / ka1; + j1t = j2 - (nrt - 1) * ka1; + if (nrt > 0) { + clartv_(&nrt, &ab[ka1 - l + 1 + (j1t - ka1 + l) * ab_dim1] + , &inca, &ab[ka1 - l + (j1t - ka1 + l) * ab_dim1], + &inca, &rwork[j1t], &work[j1t], &ka1); + } +/* L930: */ + } +/* L940: */ + } + + if (*kb > 1) { + i__3 = i2 - *ka; + for (j = 2; j <= i__3; ++j) { + rwork[j] = rwork[j + *ka]; + i__4 = j; + i__1 = j + *ka; + work[i__4].r = work[i__1].r, work[i__4].i = work[i__1].i; +/* L950: */ + } + } + + } + + goto L490; + +/* End of CHBGST */ + +} /* chbgst_ */ + diff --git a/lapack-netlib/SRC/chbgv.c b/lapack-netlib/SRC/chbgv.c new file mode 100644 index 000000000..efe71a1cf --- /dev/null +++ b/lapack-netlib/SRC/chbgv.c @@ -0,0 +1,693 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHBGV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBGV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, */ +/* LDZ, WORK, RWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBGV computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite banded eigenproblem, of */ +/* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */ +/* > and banded, and B is also positive definite. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KA */ +/* > \verbatim */ +/* > KA is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KB */ +/* > \verbatim */ +/* > KB is INTEGER */ +/* > The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first ka+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+ka). */ +/* > */ +/* > On exit, the contents of AB are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KA+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] BB */ +/* > \verbatim */ +/* > BB is COMPLEX array, dimension (LDBB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix B, stored in the first kb+1 rows of the array. The */ +/* > j-th column of B is stored in the j-th column of the array BB */ +/* > as follows: */ +/* > if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */ +/* > if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=f2cmin(n,j+kb). */ +/* > */ +/* > On exit, the factor S from the split Cholesky factorization */ +/* > B = S**H*S, as returned by CPBSTF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBB */ +/* > \verbatim */ +/* > LDBB is INTEGER */ +/* > The leading dimension of the array BB. LDBB >= KB+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* > eigenvectors, with the i-th column of Z holding the */ +/* > eigenvector associated with W(i). The eigenvectors are */ +/* > normalized so that Z**H*B*Z = I. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (3*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is: */ +/* > <= N: the algorithm failed to converge: */ +/* > i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then CPBSTF */ +/* > returned INFO = i: B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int chbgv_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, + real *w, complex *z__, integer *ldz, complex *work, real *rwork, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1; + + /* Local variables */ + integer inde; + char vect[1]; + extern logical lsame_(char *, char *); + integer iinfo; + logical upper, wantz; + extern /* Subroutine */ int chbtrd_(char *, char *, integer *, integer *, + complex *, integer *, real *, real *, complex *, integer *, + complex *, integer *), chbgst_(char *, char *, + integer *, integer *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, complex *, real *, integer *), xerbla_(char *, integer *, ftnlen), cpbstf_(char + *, integer *, integer *, complex *, integer *, integer *); + integer indwrk; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), ssterf_(integer + *, real *, real *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1 * 1; + bb -= bb_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ka < 0) { + *info = -4; + } else if (*kb < 0 || *kb > *ka) { + *info = -5; + } else if (*ldab < *ka + 1) { + *info = -7; + } else if (*ldbb < *kb + 1) { + *info = -9; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBGV ", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a split Cholesky factorization of B. */ + + cpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem. */ + + inde = 1; + indwrk = inde + *n; + chbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, + &z__[z_offset], ldz, &work[1], &rwork[indwrk], &iinfo); + +/* Reduce to tridiagonal form. */ + + if (wantz) { + *(unsigned char *)vect = 'U'; + } else { + *(unsigned char *)vect = 'N'; + } + chbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], & + z__[z_offset], ldz, &work[1], &iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + csteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[ + indwrk], info); + } + return 0; + +/* End of CHBGV */ + +} /* chbgv_ */ + diff --git a/lapack-netlib/SRC/chbgvd.c b/lapack-netlib/SRC/chbgvd.c new file mode 100644 index 000000000..0a6038608 --- /dev/null +++ b/lapack-netlib/SRC/chbgvd.c @@ -0,0 +1,825 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHBGVD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBGVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, */ +/* Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, */ +/* LIWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, */ +/* $ LWORK, N */ +/* INTEGER IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBGVD computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite banded eigenproblem, of */ +/* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */ +/* > and banded, and B is also positive definite. If eigenvectors are */ +/* > desired, it uses a divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KA */ +/* > \verbatim */ +/* > KA is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KB */ +/* > \verbatim */ +/* > KB is INTEGER */ +/* > The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first ka+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+ka). */ +/* > */ +/* > On exit, the contents of AB are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KA+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] BB */ +/* > \verbatim */ +/* > BB is COMPLEX array, dimension (LDBB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix B, stored in the first kb+1 rows of the array. The */ +/* > j-th column of B is stored in the j-th column of the array BB */ +/* > as follows: */ +/* > if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */ +/* > if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=f2cmin(n,j+kb). */ +/* > */ +/* > On exit, the factor S from the split Cholesky factorization */ +/* > B = S**H*S, as returned by CPBSTF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBB */ +/* > \verbatim */ +/* > LDBB is INTEGER */ +/* > The leading dimension of the array BB. LDBB >= KB+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* > eigenvectors, with the i-th column of Z holding the */ +/* > eigenvector associated with W(i). The eigenvectors are */ +/* > normalized so that Z**H*B*Z = I. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO=0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N <= 1, LWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK >= N. */ +/* > If JOBZ = 'V' and N > 1, LWORK >= 2*N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (MAX(1,LRWORK)) */ +/* > On exit, if INFO=0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of array RWORK. */ +/* > If N <= 1, LRWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK >= N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO=0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of array IWORK. */ +/* > If JOBZ = 'N' or N <= 1, LIWORK >= 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is: */ +/* > <= N: the algorithm failed to converge: */ +/* > i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then CPBSTF */ +/* > returned INFO = i: B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexOTHEReigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ +/* Subroutine */ int chbgvd_(char *jobz, char *uplo, integer *n, integer *ka, + integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, + real *w, complex *z__, integer *ldz, complex *work, integer *lwork, + real *rwork, integer *lrwork, integer *iwork, integer *liwork, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1; + + /* Local variables */ + integer inde; + char vect[1]; + integer llwk2; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + extern logical lsame_(char *, char *); + integer iinfo, lwmin; + logical upper; + integer llrwk; + logical wantz; + integer indwk2; + extern /* Subroutine */ int cstedc_(char *, integer *, real *, real *, + complex *, integer *, complex *, integer *, real *, integer *, + integer *, integer *, integer *), chbtrd_(char *, char *, + integer *, integer *, complex *, integer *, real *, real *, + complex *, integer *, complex *, integer *), + chbgst_(char *, char *, integer *, integer *, integer *, complex * + , integer *, complex *, integer *, complex *, integer *, complex * + , real *, integer *), clacpy_(char *, integer *, + integer *, complex *, integer *, complex *, integer *), + xerbla_(char *, integer *, ftnlen), cpbstf_(char *, integer *, + integer *, complex *, integer *, integer *); + integer indwrk, liwmin; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + integer lrwmin; + logical lquery; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1 * 1; + bb -= bb_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = *n + 1; + lrwmin = *n + 1; + liwmin = 1; + } else if (wantz) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = i__1 * i__1 << 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n; + lrwmin = *n; + liwmin = 1; + } + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ka < 0) { + *info = -4; + } else if (*kb < 0 || *kb > *ka) { + *info = -5; + } else if (*ldab < *ka + 1) { + *info = -7; + } else if (*ldbb < *kb + 1) { + *info = -9; + } else if (*ldz < 1 || wantz && *ldz < *n) { + *info = -12; + } + + if (*info == 0) { + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -14; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -16; + } else if (*liwork < liwmin && ! lquery) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBGVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a split Cholesky factorization of B. */ + + cpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem. */ + + inde = 1; + indwrk = inde + *n; + indwk2 = *n * *n + 1; + llwk2 = *lwork - indwk2 + 2; + llrwk = *lrwork - indwrk + 2; + chbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, + &z__[z_offset], ldz, &work[1], &rwork[1], &iinfo); + +/* Reduce Hermitian band matrix to tridiagonal form. */ + + if (wantz) { + *(unsigned char *)vect = 'U'; + } else { + *(unsigned char *)vect = 'N'; + } + chbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], & + z__[z_offset], ldz, &work[1], &iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + cstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], & + llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info); + cgemm_("N", "N", n, n, n, &c_b1, &z__[z_offset], ldz, &work[1], n, & + c_b2, &work[indwk2], n); + clacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); + } + + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + return 0; + +/* End of CHBGVD */ + +} /* chbgvd_ */ + diff --git a/lapack-netlib/SRC/chbgvx.c b/lapack-netlib/SRC/chbgvx.c new file mode 100644 index 000000000..8a1b14851 --- /dev/null +++ b/lapack-netlib/SRC/chbgvx.c @@ -0,0 +1,976 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHBGVX */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBGVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, */ +/* LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, */ +/* LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, */ +/* $ N */ +/* REAL ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), */ +/* $ WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBGVX computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite banded eigenproblem, of */ +/* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */ +/* > and banded, and B is also positive definite. Eigenvalues and */ +/* > eigenvectors can be selected by specifying either all eigenvalues, */ +/* > a range of values or a range of indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found; */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found; */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KA */ +/* > \verbatim */ +/* > KA is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KA >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KB */ +/* > \verbatim */ +/* > KB is INTEGER */ +/* > The number of superdiagonals of the matrix B if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KB >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first ka+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+ka). */ +/* > */ +/* > On exit, the contents of AB are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KA+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] BB */ +/* > \verbatim */ +/* > BB is COMPLEX array, dimension (LDBB, N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix B, stored in the first kb+1 rows of the array. The */ +/* > j-th column of B is stored in the j-th column of the array BB */ +/* > as follows: */ +/* > if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */ +/* > if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=f2cmin(n,j+kb). */ +/* > */ +/* > On exit, the factor S from the split Cholesky factorization */ +/* > B = S**H*S, as returned by CPBSTF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBB */ +/* > \verbatim */ +/* > LDBB is INTEGER */ +/* > The leading dimension of the array BB. LDBB >= KB+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX array, dimension (LDQ, N) */ +/* > If JOBZ = 'V', the n-by-n matrix used in the reduction of */ +/* > A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, */ +/* > and consequently C to tridiagonal form. */ +/* > If JOBZ = 'N', the array Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. If JOBZ = 'N', */ +/* > LDQ >= 1. If JOBZ = 'V', LDQ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is REAL */ +/* > */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is REAL */ +/* > */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is REAL */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing AP to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*SLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*SLAMCH('S'). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, N) */ +/* > If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */ +/* > eigenvectors, with the i-th column of Z holding the */ +/* > eigenvector associated with W(i). The eigenvectors are */ +/* > normalized so that Z**H*B*Z = I. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is: */ +/* > <= N: then i eigenvectors failed to converge. Their */ +/* > indices are stored in array IFAIL. */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then CPBSTF */ +/* > returned INFO = i: B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexOTHEReigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ +/* Subroutine */ int chbgvx_(char *jobz, char *range, char *uplo, integer *n, + integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, + integer *ldbb, complex *q, integer *ldq, real *vl, real *vu, integer * + il, integer *iu, real *abstol, integer *m, real *w, complex *z__, + integer *ldz, complex *work, real *rwork, integer *iwork, integer * + ifail, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, bb_dim1, bb_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2; + + /* Local variables */ + integer indd, inde; + char vect[1]; + logical test; + integer itmp1, i__, j, indee; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + integer iinfo; + char order[1]; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), cswap_(integer *, complex *, integer *, + complex *, integer *); + logical upper; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantz; + integer jj; + logical alleig, indeig; + integer indibl; + extern /* Subroutine */ int chbtrd_(char *, char *, integer *, integer *, + complex *, integer *, real *, real *, complex *, integer *, + complex *, integer *); + logical valeig; + extern /* Subroutine */ int chbgst_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *, complex *, real *, integer *), clacpy_( + char *, integer *, integer *, complex *, integer *, complex *, + integer *), xerbla_(char *, integer *, ftnlen), cpbstf_( + char *, integer *, integer *, complex *, integer *, integer *); + integer indiwk, indisp; + extern /* Subroutine */ int cstein_(integer *, real *, real *, integer *, + real *, integer *, integer *, complex *, integer *, real *, + integer *, integer *, integer *); + integer indrwk, indwrk; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), ssterf_(integer + *, real *, real *, integer *); + integer nsplit; + extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, + real *, integer *, integer *, real *, real *, real *, integer *, + integer *, real *, integer *, integer *, real *, integer *, + integer *); + real tmp1; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + bb_dim1 = *ldbb; + bb_offset = 1 + bb_dim1 * 1; + bb -= bb_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ka < 0) { + *info = -5; + } else if (*kb < 0 || *kb > *ka) { + *info = -6; + } else if (*ldab < *ka + 1) { + *info = -8; + } else if (*ldbb < *kb + 1) { + *info = -10; + } else if (*ldq < 1 || wantz && *ldq < *n) { + *info = -12; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -14; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -15; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -16; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -21; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBGVX", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + +/* Form a split Cholesky factorization of B. */ + + cpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem. */ + + chbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, + &q[q_offset], ldq, &work[1], &rwork[1], &iinfo); + +/* Solve the standard eigenvalue problem. */ +/* Reduce Hermitian band matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + indwrk = 1; + if (wantz) { + *(unsigned char *)vect = 'U'; + } else { + *(unsigned char *)vect = 'N'; + } + chbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &rwork[indd], &rwork[ + inde], &q[q_offset], ldq, &work[indwrk], &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal */ +/* to zero, then call SSTERF or CSTEQR. If this fails for some */ +/* eigenvalue, then try SSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.f) { + scopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + i__1 = *n - 1; + scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + if (! wantz) { + ssterf_(n, &w[1], &rwork[indee], info); + } else { + clacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); + csteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L10: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call SSTEBZ and, if eigenvectors are desired, */ +/* call CSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + sstebz_(range, order, n, vl, vu, il, iu, abstol, &rwork[indd], &rwork[ + inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[ + indrwk], &iwork[indiwk], info); + + if (wantz) { + cstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by CSTEIN. */ + + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + ccopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1); + cgemv_("N", n, n, &c_b2, &q[q_offset], ldq, &work[1], &c__1, & + c_b1, &z__[j * z_dim1 + 1], &c__1); +/* L20: */ + } + } + +L30: + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L50: */ + } + } + + return 0; + +/* End of CHBGVX */ + +} /* chbgvx_ */ + diff --git a/lapack-netlib/SRC/chbtrd.c b/lapack-netlib/SRC/chbtrd.c new file mode 100644 index 000000000..c5bf16098 --- /dev/null +++ b/lapack-netlib/SRC/chbtrd.c @@ -0,0 +1,1251 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHBTRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBTRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, */ +/* WORK, INFO ) */ + +/* CHARACTER UPLO, VECT */ +/* INTEGER INFO, KD, LDAB, LDQ, N */ +/* REAL D( * ), E( * ) */ +/* COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHBTRD reduces a complex Hermitian band matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > = 'N': do not form Q; */ +/* > = 'V': form Q; */ +/* > = 'U': update a matrix X, by forming X*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB,N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > On exit, the diagonal elements of AB are overwritten by the */ +/* > diagonal elements of the tridiagonal matrix T; if KD > 0, the */ +/* > elements on the first superdiagonal (if UPLO = 'U') or the */ +/* > first subdiagonal (if UPLO = 'L') are overwritten by the */ +/* > off-diagonal elements of T; the rest of AB is overwritten by */ +/* > values generated during the reduction. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is REAL array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX array, dimension (LDQ,N) */ +/* > On entry, if VECT = 'U', then Q must contain an N-by-N */ +/* > matrix X; if VECT = 'N' or 'V', then Q need not be set. */ +/* > */ +/* > On exit: */ +/* > if VECT = 'V', Q contains the N-by-N unitary matrix Q; */ +/* > if VECT = 'U', Q contains the product X*Q; */ +/* > if VECT = 'N', the array Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. */ +/* > LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Modified by Linda Kaufman, Bell Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int chbtrd_(char *vect, char *uplo, integer *n, integer *kd, + complex *ab, integer *ldab, real *d__, real *e, complex *q, integer * + ldq, complex *work, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, + i__5, i__6; + real r__1; + complex q__1; + + /* Local variables */ + integer inca, jend, lend, jinc; + real abst; + integer incx, last; + complex temp; + extern /* Subroutine */ int crot_(integer *, complex *, integer *, + complex *, integer *, real *, complex *); + integer j1end, j1inc, i__, j, k, l; + complex t; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + integer iqend; + extern logical lsame_(char *, char *); + logical initq, wantq, upper; + integer i2, j1, j2; + extern /* Subroutine */ int clar2v_(integer *, complex *, complex *, + complex *, integer *, real *, complex *, integer *); + integer nq, nr, iqaend; + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), + claset_(char *, integer *, integer *, complex *, complex *, + complex *, integer *), clartg_(complex *, complex *, real + *, complex *, complex *), xerbla_(char *, integer *, ftnlen), + clargv_(integer *, complex *, integer *, complex *, integer *, + real *, integer *), clartv_(integer *, complex *, integer *, + complex *, integer *, real *, complex *, integer *); + integer kd1, ibl, iqb, kdn, jin, nrt, kdm1; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --work; + + /* Function Body */ + initq = lsame_(vect, "V"); + wantq = initq || lsame_(vect, "U"); + upper = lsame_(uplo, "U"); + kd1 = *kd + 1; + kdm1 = *kd - 1; + incx = *ldab - 1; + iqend = 1; + + *info = 0; + if (! wantq && ! lsame_(vect, "N")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*kd < 0) { + *info = -4; + } else if (*ldab < kd1) { + *info = -6; + } else if (*ldq < f2cmax(1,*n) && wantq) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHBTRD", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Initialize Q to the unit matrix, if needed */ + + if (initq) { + claset_("Full", n, n, &c_b1, &c_b2, &q[q_offset], ldq); + } + +/* Wherever possible, plane rotations are generated and applied in */ +/* vector operations of length NR over the index set J1:J2:KD1. */ + +/* The real cosines and complex sines of the plane rotations are */ +/* stored in the arrays D and WORK. */ + + inca = kd1 * *ldab; +/* Computing MIN */ + i__1 = *n - 1; + kdn = f2cmin(i__1,*kd); + if (upper) { + + if (*kd > 1) { + +/* Reduce to complex Hermitian tridiagonal form, working with */ +/* the upper triangle */ + + nr = 0; + j1 = kdn + 2; + j2 = 1; + + i__1 = kd1 + ab_dim1; + i__2 = kd1 + ab_dim1; + r__1 = ab[i__2].r; + ab[i__1].r = r__1, ab[i__1].i = 0.f; + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Reduce i-th row of matrix to tridiagonal form */ + + for (k = kdn + 1; k >= 2; --k) { + j1 += kdn; + j2 += kdn; + + if (nr > 0) { + +/* generate plane rotations to annihilate nonzero */ +/* elements which have been created outside the band */ + + clargv_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &inca, & + work[j1], &kd1, &d__[j1], &kd1); + +/* apply rotations from the right */ + + +/* Dependent on the the number of diagonals either */ +/* CLARTV or CROT is used */ + + if (nr >= (*kd << 1) - 1) { + i__2 = *kd - 1; + for (l = 1; l <= i__2; ++l) { + clartv_(&nr, &ab[l + 1 + (j1 - 1) * ab_dim1], + &inca, &ab[l + j1 * ab_dim1], &inca, & + d__[j1], &work[j1], &kd1); +/* L10: */ + } + + } else { + jend = j1 + (nr - 1) * kd1; + i__2 = jend; + i__3 = kd1; + for (jinc = j1; i__3 < 0 ? jinc >= i__2 : jinc <= + i__2; jinc += i__3) { + crot_(&kdm1, &ab[(jinc - 1) * ab_dim1 + 2], & + c__1, &ab[jinc * ab_dim1 + 1], &c__1, + &d__[jinc], &work[jinc]); +/* L20: */ + } + } + } + + + if (k > 2) { + if (k <= *n - i__ + 1) { + +/* generate plane rotation to annihilate a(i,i+k-1) */ +/* within the band */ + + clartg_(&ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1] + , &ab[*kd - k + 2 + (i__ + k - 1) * + ab_dim1], &d__[i__ + k - 1], &work[i__ + + k - 1], &temp); + i__3 = *kd - k + 3 + (i__ + k - 2) * ab_dim1; + ab[i__3].r = temp.r, ab[i__3].i = temp.i; + +/* apply rotation from the right */ + + i__3 = k - 3; + crot_(&i__3, &ab[*kd - k + 4 + (i__ + k - 2) * + ab_dim1], &c__1, &ab[*kd - k + 3 + (i__ + + k - 1) * ab_dim1], &c__1, &d__[i__ + k - + 1], &work[i__ + k - 1]); + } + ++nr; + j1 = j1 - kdn - 1; + } + +/* apply plane rotations from both sides to diagonal */ +/* blocks */ + + if (nr > 0) { + clar2v_(&nr, &ab[kd1 + (j1 - 1) * ab_dim1], &ab[kd1 + + j1 * ab_dim1], &ab[*kd + j1 * ab_dim1], &inca, + &d__[j1], &work[j1], &kd1); + } + +/* apply plane rotations from the left */ + + if (nr > 0) { + clacgv_(&nr, &work[j1], &kd1); + if ((*kd << 1) - 1 < nr) { + +/* Dependent on the the number of diagonals either */ +/* CLARTV or CROT is used */ + + i__3 = *kd - 1; + for (l = 1; l <= i__3; ++l) { + if (j2 + l > *n) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + clartv_(&nrt, &ab[*kd - l + (j1 + l) * + ab_dim1], &inca, &ab[*kd - l + 1 + + (j1 + l) * ab_dim1], &inca, & + d__[j1], &work[j1], &kd1); + } +/* L30: */ + } + } else { + j1end = j1 + kd1 * (nr - 2); + if (j1end >= j1) { + i__3 = j1end; + i__2 = kd1; + for (jin = j1; i__2 < 0 ? jin >= i__3 : jin <= + i__3; jin += i__2) { + i__4 = *kd - 1; + crot_(&i__4, &ab[*kd - 1 + (jin + 1) * + ab_dim1], &incx, &ab[*kd + (jin + + 1) * ab_dim1], &incx, &d__[jin], & + work[jin]); +/* L40: */ + } + } +/* Computing MIN */ + i__2 = kdm1, i__3 = *n - j2; + lend = f2cmin(i__2,i__3); + last = j1end + kd1; + if (lend > 0) { + crot_(&lend, &ab[*kd - 1 + (last + 1) * + ab_dim1], &incx, &ab[*kd + (last + 1) + * ab_dim1], &incx, &d__[last], &work[ + last]); + } + } + } + + if (wantq) { + +/* accumulate product of plane rotations in Q */ + + if (initq) { + +/* take advantage of the fact that Q was */ +/* initially the Identity matrix */ + + iqend = f2cmax(iqend,j2); +/* Computing MAX */ + i__2 = 0, i__3 = k - 3; + i2 = f2cmax(i__2,i__3); + iqaend = i__ * *kd + 1; + if (k == 2) { + iqaend += *kd; + } + iqaend = f2cmin(iqaend,iqend); + i__2 = j2; + i__3 = kd1; + for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j + += i__3) { + ibl = i__ - i2 / kdm1; + ++i2; +/* Computing MAX */ + i__4 = 1, i__5 = j - ibl; + iqb = f2cmax(i__4,i__5); + nq = iqaend + 1 - iqb; +/* Computing MIN */ + i__4 = iqaend + *kd; + iqaend = f2cmin(i__4,iqend); + r_cnjg(&q__1, &work[j]); + crot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, + &q[iqb + j * q_dim1], &c__1, &d__[j], + &q__1); +/* L50: */ + } + } else { + + i__3 = j2; + i__2 = kd1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j + += i__2) { + r_cnjg(&q__1, &work[j]); + crot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ + j * q_dim1 + 1], &c__1, &d__[j], & + q__1); +/* L60: */ + } + } + + } + + if (j2 + kdn > *n) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 = j2 - kdn - 1; + } + + i__2 = j2; + i__3 = kd1; + for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) + { + +/* create nonzero element a(j-1,j+kd) outside the band */ +/* and store it in WORK */ + + i__4 = j + *kd; + i__5 = j; + i__6 = (j + *kd) * ab_dim1 + 1; + q__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * + ab[i__6].i, q__1.i = work[i__5].r * ab[i__6] + .i + work[i__5].i * ab[i__6].r; + work[i__4].r = q__1.r, work[i__4].i = q__1.i; + i__4 = (j + *kd) * ab_dim1 + 1; + i__5 = j; + i__6 = (j + *kd) * ab_dim1 + 1; + q__1.r = d__[i__5] * ab[i__6].r, q__1.i = d__[i__5] * + ab[i__6].i; + ab[i__4].r = q__1.r, ab[i__4].i = q__1.i; +/* L70: */ + } +/* L80: */ + } +/* L90: */ + } + } + + if (*kd > 0) { + +/* make off-diagonal elements real and copy them to E */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__3 = *kd + (i__ + 1) * ab_dim1; + t.r = ab[i__3].r, t.i = ab[i__3].i; + abst = c_abs(&t); + i__3 = *kd + (i__ + 1) * ab_dim1; + ab[i__3].r = abst, ab[i__3].i = 0.f; + e[i__] = abst; + if (abst != 0.f) { + q__1.r = t.r / abst, q__1.i = t.i / abst; + t.r = q__1.r, t.i = q__1.i; + } else { + t.r = 1.f, t.i = 0.f; + } + if (i__ < *n - 1) { + i__3 = *kd + (i__ + 2) * ab_dim1; + i__2 = *kd + (i__ + 2) * ab_dim1; + q__1.r = ab[i__2].r * t.r - ab[i__2].i * t.i, q__1.i = ab[ + i__2].r * t.i + ab[i__2].i * t.r; + ab[i__3].r = q__1.r, ab[i__3].i = q__1.i; + } + if (wantq) { + r_cnjg(&q__1, &t); + cscal_(n, &q__1, &q[(i__ + 1) * q_dim1 + 1], &c__1); + } +/* L100: */ + } + } else { + +/* set E to zero if original matrix was diagonal */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.f; +/* L110: */ + } + } + +/* copy diagonal elements to D */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__3 = i__; + i__2 = kd1 + i__ * ab_dim1; + d__[i__3] = ab[i__2].r; +/* L120: */ + } + + } else { + + if (*kd > 1) { + +/* Reduce to complex Hermitian tridiagonal form, working with */ +/* the lower triangle */ + + nr = 0; + j1 = kdn + 2; + j2 = 1; + + i__1 = ab_dim1 + 1; + i__3 = ab_dim1 + 1; + r__1 = ab[i__3].r; + ab[i__1].r = r__1, ab[i__1].i = 0.f; + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Reduce i-th column of matrix to tridiagonal form */ + + for (k = kdn + 1; k >= 2; --k) { + j1 += kdn; + j2 += kdn; + + if (nr > 0) { + +/* generate plane rotations to annihilate nonzero */ +/* elements which have been created outside the band */ + + clargv_(&nr, &ab[kd1 + (j1 - kd1) * ab_dim1], &inca, & + work[j1], &kd1, &d__[j1], &kd1); + +/* apply plane rotations from one side */ + + +/* Dependent on the the number of diagonals either */ +/* CLARTV or CROT is used */ + + if (nr > (*kd << 1) - 1) { + i__3 = *kd - 1; + for (l = 1; l <= i__3; ++l) { + clartv_(&nr, &ab[kd1 - l + (j1 - kd1 + l) * + ab_dim1], &inca, &ab[kd1 - l + 1 + ( + j1 - kd1 + l) * ab_dim1], &inca, &d__[ + j1], &work[j1], &kd1); +/* L130: */ + } + } else { + jend = j1 + kd1 * (nr - 1); + i__3 = jend; + i__2 = kd1; + for (jinc = j1; i__2 < 0 ? jinc >= i__3 : jinc <= + i__3; jinc += i__2) { + crot_(&kdm1, &ab[*kd + (jinc - *kd) * ab_dim1] + , &incx, &ab[kd1 + (jinc - *kd) * + ab_dim1], &incx, &d__[jinc], &work[ + jinc]); +/* L140: */ + } + } + + } + + if (k > 2) { + if (k <= *n - i__ + 1) { + +/* generate plane rotation to annihilate a(i+k-1,i) */ +/* within the band */ + + clartg_(&ab[k - 1 + i__ * ab_dim1], &ab[k + i__ * + ab_dim1], &d__[i__ + k - 1], &work[i__ + + k - 1], &temp); + i__2 = k - 1 + i__ * ab_dim1; + ab[i__2].r = temp.r, ab[i__2].i = temp.i; + +/* apply rotation from the left */ + + i__2 = k - 3; + i__3 = *ldab - 1; + i__4 = *ldab - 1; + crot_(&i__2, &ab[k - 2 + (i__ + 1) * ab_dim1], & + i__3, &ab[k - 1 + (i__ + 1) * ab_dim1], & + i__4, &d__[i__ + k - 1], &work[i__ + k - + 1]); + } + ++nr; + j1 = j1 - kdn - 1; + } + +/* apply plane rotations from both sides to diagonal */ +/* blocks */ + + if (nr > 0) { + clar2v_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &ab[j1 * + ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 2], & + inca, &d__[j1], &work[j1], &kd1); + } + +/* apply plane rotations from the right */ + + +/* Dependent on the the number of diagonals either */ +/* CLARTV or CROT is used */ + + if (nr > 0) { + clacgv_(&nr, &work[j1], &kd1); + if (nr > (*kd << 1) - 1) { + i__2 = *kd - 1; + for (l = 1; l <= i__2; ++l) { + if (j2 + l > *n) { + nrt = nr - 1; + } else { + nrt = nr; + } + if (nrt > 0) { + clartv_(&nrt, &ab[l + 2 + (j1 - 1) * + ab_dim1], &inca, &ab[l + 1 + j1 * + ab_dim1], &inca, &d__[j1], &work[ + j1], &kd1); + } +/* L150: */ + } + } else { + j1end = j1 + kd1 * (nr - 2); + if (j1end >= j1) { + i__2 = j1end; + i__3 = kd1; + for (j1inc = j1; i__3 < 0 ? j1inc >= i__2 : + j1inc <= i__2; j1inc += i__3) { + crot_(&kdm1, &ab[(j1inc - 1) * ab_dim1 + + 3], &c__1, &ab[j1inc * ab_dim1 + + 2], &c__1, &d__[j1inc], &work[ + j1inc]); +/* L160: */ + } + } +/* Computing MIN */ + i__3 = kdm1, i__2 = *n - j2; + lend = f2cmin(i__3,i__2); + last = j1end + kd1; + if (lend > 0) { + crot_(&lend, &ab[(last - 1) * ab_dim1 + 3], & + c__1, &ab[last * ab_dim1 + 2], &c__1, + &d__[last], &work[last]); + } + } + } + + + + if (wantq) { + +/* accumulate product of plane rotations in Q */ + + if (initq) { + +/* take advantage of the fact that Q was */ +/* initially the Identity matrix */ + + iqend = f2cmax(iqend,j2); +/* Computing MAX */ + i__3 = 0, i__2 = k - 3; + i2 = f2cmax(i__3,i__2); + iqaend = i__ * *kd + 1; + if (k == 2) { + iqaend += *kd; + } + iqaend = f2cmin(iqaend,iqend); + i__3 = j2; + i__2 = kd1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j + += i__2) { + ibl = i__ - i2 / kdm1; + ++i2; +/* Computing MAX */ + i__4 = 1, i__5 = j - ibl; + iqb = f2cmax(i__4,i__5); + nq = iqaend + 1 - iqb; +/* Computing MIN */ + i__4 = iqaend + *kd; + iqaend = f2cmin(i__4,iqend); + crot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, + &q[iqb + j * q_dim1], &c__1, &d__[j], + &work[j]); +/* L170: */ + } + } else { + + i__2 = j2; + i__3 = kd1; + for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j + += i__3) { + crot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ + j * q_dim1 + 1], &c__1, &d__[j], & + work[j]); +/* L180: */ + } + } + } + + if (j2 + kdn > *n) { + +/* adjust J2 to keep within the bounds of the matrix */ + + --nr; + j2 = j2 - kdn - 1; + } + + i__3 = j2; + i__2 = kd1; + for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) + { + +/* create nonzero element a(j+kd,j-1) outside the */ +/* band and store it in WORK */ + + i__4 = j + *kd; + i__5 = j; + i__6 = kd1 + j * ab_dim1; + q__1.r = work[i__5].r * ab[i__6].r - work[i__5].i * + ab[i__6].i, q__1.i = work[i__5].r * ab[i__6] + .i + work[i__5].i * ab[i__6].r; + work[i__4].r = q__1.r, work[i__4].i = q__1.i; + i__4 = kd1 + j * ab_dim1; + i__5 = j; + i__6 = kd1 + j * ab_dim1; + q__1.r = d__[i__5] * ab[i__6].r, q__1.i = d__[i__5] * + ab[i__6].i; + ab[i__4].r = q__1.r, ab[i__4].i = q__1.i; +/* L190: */ + } +/* L200: */ + } +/* L210: */ + } + } + + if (*kd > 0) { + +/* make off-diagonal elements real and copy them to E */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ * ab_dim1 + 2; + t.r = ab[i__2].r, t.i = ab[i__2].i; + abst = c_abs(&t); + i__2 = i__ * ab_dim1 + 2; + ab[i__2].r = abst, ab[i__2].i = 0.f; + e[i__] = abst; + if (abst != 0.f) { + q__1.r = t.r / abst, q__1.i = t.i / abst; + t.r = q__1.r, t.i = q__1.i; + } else { + t.r = 1.f, t.i = 0.f; + } + if (i__ < *n - 1) { + i__2 = (i__ + 1) * ab_dim1 + 2; + i__3 = (i__ + 1) * ab_dim1 + 2; + q__1.r = ab[i__3].r * t.r - ab[i__3].i * t.i, q__1.i = ab[ + i__3].r * t.i + ab[i__3].i * t.r; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; + } + if (wantq) { + cscal_(n, &t, &q[(i__ + 1) * q_dim1 + 1], &c__1); + } +/* L220: */ + } + } else { + +/* set E to zero if original matrix was diagonal */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.f; +/* L230: */ + } + } + +/* copy diagonal elements to D */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ * ab_dim1 + 1; + d__[i__2] = ab[i__3].r; +/* L240: */ + } + } + + return 0; + +/* End of CHBTRD */ + +} /* chbtrd_ */ + diff --git a/lapack-netlib/SRC/checon.c b/lapack-netlib/SRC/checon.c new file mode 100644 index 000000000..b061f5911 --- /dev/null +++ b/lapack-netlib/SRC/checon.c @@ -0,0 +1,633 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHECON */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHECON + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, */ +/* INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* REAL ANORM, RCOND */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHECON estimates the reciprocal of the condition number of a complex */ +/* > Hermitian matrix A using the factorization A = U*D*U**H or */ +/* > A = L*D*L**H computed by CHETRF. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the details of the factorization are stored */ +/* > as an upper or lower triangular matrix. */ +/* > = 'U': Upper triangular, form is A = U*D*U**H; */ +/* > = 'L': Lower triangular, form is A = L*D*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > The block diagonal matrix D and the multipliers used to */ +/* > obtain the factor U or L as computed by CHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D */ +/* > as determined by CHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is REAL */ +/* > The 1-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* > estimate of the 1-norm of inv(A) computed in this routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int checon_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, real *anorm, real *rcond, complex *work, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer kase, i__; + extern logical lsame_(char *, char *); + integer isave[3]; + logical upper; + extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real + *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + real ainvnm; + extern /* Subroutine */ int chetrs_(char *, integer *, integer *, complex + *, integer *, integer *, complex *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*anorm < 0.f) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHECON", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.f; + if (*n == 0) { + *rcond = 1.f; + return 0; + } else if (*anorm <= 0.f) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + for (i__ = *n; i__ >= 1; --i__) { + i__1 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__1].r == 0.f && a[i__1].i == 0.f)) { + return 0; + } +/* L10: */ + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__2].r == 0.f && a[i__2].i == 0.f)) { + return 0; + } +/* L20: */ + } + } + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; +L30: + clacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave); + if (kase != 0) { + +/* Multiply by inv(L*D*L**H) or inv(U*D*U**H). */ + + chetrs_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], n, + info); + goto L30; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.f) { + *rcond = 1.f / ainvnm / *anorm; + } + + return 0; + +/* End of CHECON */ + +} /* checon_ */ + diff --git a/lapack-netlib/SRC/checon_3.c b/lapack-netlib/SRC/checon_3.c new file mode 100644 index 000000000..bc51072ea --- /dev/null +++ b/lapack-netlib/SRC/checon_3.c @@ -0,0 +1,674 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHECON_3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHECON_3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, */ +/* WORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* REAL ANORM, RCOND */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ), E ( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > CHECON_3 estimates the reciprocal of the condition number (in the */ +/* > 1-norm) of a complex Hermitian matrix A using the factorization */ +/* > computed by CHETRF_RK or CHETRF_BK: */ +/* > */ +/* > A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), */ +/* > */ +/* > where U (or L) is unit upper (or lower) triangular matrix, */ +/* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */ +/* > matrix, P**T is the transpose of P, and D is Hermitian and block */ +/* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ +/* > This routine uses BLAS3 solver CHETRS_3. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the details of the factorization are */ +/* > stored as an upper or lower triangular matrix: */ +/* > = 'U': Upper triangular, form is A = P*U*D*(U**H)*(P**T); */ +/* > = 'L': Lower triangular, form is A = P*L*D*(L**H)*(P**T). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > Diagonal of the block diagonal matrix D and factors U or L */ +/* > as computed by CHETRF_RK and CHETRF_BK: */ +/* > a) ONLY diagonal elements of the Hermitian block diagonal */ +/* > matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */ +/* > (superdiagonal (or subdiagonal) elements of D */ +/* > should be provided on entry in array E), and */ +/* > b) If UPLO = 'U': factor U in the superdiagonal part of A. */ +/* > If UPLO = 'L': factor L in the subdiagonal part of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] E */ +/* > \verbatim */ +/* > E is COMPLEX array, dimension (N) */ +/* > On entry, contains the superdiagonal (or subdiagonal) */ +/* > elements of the Hermitian block diagonal matrix D */ +/* > with 1-by-1 or 2-by-2 diagonal blocks, where */ +/* > If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; */ +/* > If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. */ +/* > */ +/* > NOTE: For 1-by-1 diagonal block D(k), where */ +/* > 1 <= k <= N, the element E(k) is not referenced in both */ +/* > UPLO = 'U' or UPLO = 'L' cases. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D */ +/* > as determined by CHETRF_RK or CHETRF_BK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is REAL */ +/* > The 1-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* > estimate of the 1-norm of inv(A) computed in this routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > \verbatim */ +/* > */ +/* > June 2017, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int checon_3_(char *uplo, integer *n, complex *a, integer * + lda, complex *e, integer *ipiv, real *anorm, real *rcond, complex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer kase, i__; + extern logical lsame_(char *, char *); + integer isave[3]; + logical upper; + extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real + *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + real ainvnm; + extern /* Subroutine */ int chetrs_3_(char *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *, + integer *); + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --e; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*anorm < 0.f) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHECON_3", &i__1, (ftnlen)8); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.f; + if (*n == 0) { + *rcond = 1.f; + return 0; + } else if (*anorm <= 0.f) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + for (i__ = *n; i__ >= 1; --i__) { + i__1 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__1].r == 0.f && a[i__1].i == 0.f)) { + return 0; + } + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__2].r == 0.f && a[i__2].i == 0.f)) { + return 0; + } + } + } + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; +L30: + clacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave); + if (kase != 0) { + +/* Multiply by inv(L*D*L**H) or inv(U*D*U**H). */ + + chetrs_3_(uplo, n, &c__1, &a[a_offset], lda, &e[1], &ipiv[1], &work[ + 1], n, info); + goto L30; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.f) { + *rcond = 1.f / ainvnm / *anorm; + } + + return 0; + +/* End of CHECON_3 */ + +} /* checon_3__ */ + diff --git a/lapack-netlib/SRC/checon_rook.c b/lapack-netlib/SRC/checon_rook.c new file mode 100644 index 000000000..549f4ed63 --- /dev/null +++ b/lapack-netlib/SRC/checon_rook.c @@ -0,0 +1,648 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHECON_ROOK estimates the reciprocal of the condition number fort HE matrices using factorizat +ion obtained with one of the bounded diagonal pivoting methods (f2cmax 2 interchanges) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHECON_ROOK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHECON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, */ +/* INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* REAL ANORM, RCOND */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHECON_ROOK estimates the reciprocal of the condition number of a complex */ +/* > Hermitian matrix A using the factorization A = U*D*U**H or */ +/* > A = L*D*L**H computed by CHETRF_ROOK. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the details of the factorization are stored */ +/* > as an upper or lower triangular matrix. */ +/* > = 'U': Upper triangular, form is A = U*D*U**H; */ +/* > = 'L': Lower triangular, form is A = L*D*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > The block diagonal matrix D and the multipliers used to */ +/* > obtain the factor U or L as computed by CHETRF_ROOK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D */ +/* > as determined by CHETRF_ROOK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is REAL */ +/* > The 1-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* > estimate of the 1-norm of inv(A) computed in this routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > \verbatim */ +/* > */ +/* > December 2016, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int checon_rook_(char *uplo, integer *n, complex *a, + integer *lda, integer *ipiv, real *anorm, real *rcond, complex *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer kase, i__; + extern logical lsame_(char *, char *); + integer isave[3]; + logical upper; + extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real + *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + real ainvnm; + extern /* Subroutine */ int chetrs_rook_(char *, integer *, integer *, + complex *, integer *, integer *, complex *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*anorm < 0.f) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHECON_ROOK", &i__1, (ftnlen)11); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.f; + if (*n == 0) { + *rcond = 1.f; + return 0; + } else if (*anorm <= 0.f) { + return 0; + } + +/* Check that the diagonal matrix D is nonsingular. */ + + if (upper) { + +/* Upper triangular storage: examine D from bottom to top */ + + for (i__ = *n; i__ >= 1; --i__) { + i__1 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__1].r == 0.f && a[i__1].i == 0.f)) { + return 0; + } +/* L10: */ + } + } else { + +/* Lower triangular storage: examine D from top to bottom. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (ipiv[i__] > 0 && (a[i__2].r == 0.f && a[i__2].i == 0.f)) { + return 0; + } +/* L20: */ + } + } + +/* Estimate the 1-norm of the inverse. */ + + kase = 0; +L30: + clacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave); + if (kase != 0) { + +/* Multiply by inv(L*D*L**H) or inv(U*D*U**H). */ + + chetrs_rook_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], + n, info); + goto L30; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.f) { + *rcond = 1.f / ainvnm / *anorm; + } + + return 0; + +/* End of CHECON_ROOK */ + +} /* checon_rook__ */ + diff --git a/lapack-netlib/SRC/cheequb.c b/lapack-netlib/SRC/cheequb.c new file mode 100644 index 000000000..998699987 --- /dev/null +++ b/lapack-netlib/SRC/cheequb.c @@ -0,0 +1,873 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHEEQUB */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEEQUB + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) */ + +/* INTEGER INFO, LDA, N */ +/* REAL AMAX, SCOND */ +/* CHARACTER UPLO */ +/* COMPLEX A( LDA, * ), WORK( * ) */ +/* REAL S( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEEQUB computes row and column scalings intended to equilibrate a */ +/* > Hermitian matrix A (with respect to the Euclidean norm) and reduce */ +/* > its condition number. The scale factors S are computed by the BIN */ +/* > algorithm (see references) so that the scaled matrix B with elements */ +/* > B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of */ +/* > the smallest possible condition number over all possible diagonal */ +/* > scalings. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > The N-by-N Hermitian matrix whose scaling factors are to be */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is REAL array, dimension (N) */ +/* > If INFO = 0, S contains the scale factors for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCOND */ +/* > \verbatim */ +/* > SCOND is REAL */ +/* > If INFO = 0, S contains the ratio of the smallest S(i) to */ +/* > the largest S(i). If SCOND >= 0.1 and AMAX is neither too */ +/* > large nor too small, it is not worth scaling by S. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AMAX */ +/* > \verbatim */ +/* > AMAX is REAL */ +/* > Largest absolute value of any matrix element. If AMAX is */ +/* > very close to overflow or very close to underflow, the */ +/* > matrix should be scaled. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element is nonpositive. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par References: */ +/* ================ */ +/* > */ +/* > Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n */ +/* > Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n */ +/* > DOI 10.1023/B:NUMA.0000016606.32820.69 \n */ +/* > Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cheequb_(char *uplo, integer *n, complex *a, integer * + lda, real *s, real *scond, real *amax, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2, r__3, r__4; + doublereal d__1; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + real base; + integer iter; + real smin, smax, d__; + integer i__, j; + real t, u, scale; + extern logical lsame_(char *, char *); + real c0, c1, c2, sumsq, si; + logical up; + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + extern /* Subroutine */ int classq_(integer *, complex *, integer *, real + *, real *); + real smlnum, avg, std, tol; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + --work; + + /* Function Body */ + *info = 0; + if (! (lsame_(uplo, "U") || lsame_(uplo, "L"))) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEQUB", &i__1, (ftnlen)7); + return 0; + } + up = lsame_(uplo, "U"); + *amax = 0.f; + +/* Quick return if possible. */ + + if (*n == 0) { + *scond = 1.f; + return 0; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + s[i__] = 0.f; + } + *amax = 0.f; + if (up) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + r__3 = s[i__], r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = + r_imag(&a[i__ + j * a_dim1]), abs(r__2)); + s[i__] = f2cmax(r__3,r__4); +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + r__3 = s[j], r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = + r_imag(&a[i__ + j * a_dim1]), abs(r__2)); + s[j] = f2cmax(r__3,r__4); +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + r__3 = *amax, r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = + r_imag(&a[i__ + j * a_dim1]), abs(r__2)); + *amax = f2cmax(r__3,r__4); + } +/* Computing MAX */ + i__2 = j + j * a_dim1; + r__3 = s[j], r__4 = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = + r_imag(&a[j + j * a_dim1]), abs(r__2)); + s[j] = f2cmax(r__3,r__4); +/* Computing MAX */ + i__2 = j + j * a_dim1; + r__3 = *amax, r__4 = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = + r_imag(&a[j + j * a_dim1]), abs(r__2)); + *amax = f2cmax(r__3,r__4); + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j + j * a_dim1; + r__3 = s[j], r__4 = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = + r_imag(&a[j + j * a_dim1]), abs(r__2)); + s[j] = f2cmax(r__3,r__4); +/* Computing MAX */ + i__2 = j + j * a_dim1; + r__3 = *amax, r__4 = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = + r_imag(&a[j + j * a_dim1]), abs(r__2)); + *amax = f2cmax(r__3,r__4); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + r__3 = s[i__], r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = + r_imag(&a[i__ + j * a_dim1]), abs(r__2)); + s[i__] = f2cmax(r__3,r__4); +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + r__3 = s[j], r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = + r_imag(&a[i__ + j * a_dim1]), abs(r__2)); + s[j] = f2cmax(r__3,r__4); +/* Computing MAX */ + i__3 = i__ + j * a_dim1; + r__3 = *amax, r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = + r_imag(&a[i__ + j * a_dim1]), abs(r__2)); + *amax = f2cmax(r__3,r__4); + } + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + s[j] = 1.f / s[j]; + } + tol = 1.f / sqrt(*n * 2.f); + for (iter = 1; iter <= 100; ++iter) { + scale = 0.f; + sumsq = 0.f; +/* beta = |A|s */ + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + work[i__2].r = 0.f, work[i__2].i = 0.f; + } + if (up) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + r__3 = ((r__1 = a[i__5].r, abs(r__1)) + (r__2 = r_imag(&a[ + i__ + j * a_dim1]), abs(r__2))) * s[j]; + q__1.r = work[i__4].r + r__3, q__1.i = work[i__4].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + i__3 = j; + i__4 = j; + i__5 = i__ + j * a_dim1; + r__3 = ((r__1 = a[i__5].r, abs(r__1)) + (r__2 = r_imag(&a[ + i__ + j * a_dim1]), abs(r__2))) * s[i__]; + q__1.r = work[i__4].r + r__3, q__1.i = work[i__4].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + r__3 = ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[j + + j * a_dim1]), abs(r__2))) * s[j]; + q__1.r = work[i__3].r + r__3, q__1.i = work[i__3].i; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + r__3 = ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[j + + j * a_dim1]), abs(r__2))) * s[j]; + q__1.r = work[i__3].r + r__3, q__1.i = work[i__3].i; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + r__3 = ((r__1 = a[i__5].r, abs(r__1)) + (r__2 = r_imag(&a[ + i__ + j * a_dim1]), abs(r__2))) * s[j]; + q__1.r = work[i__4].r + r__3, q__1.i = work[i__4].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + i__3 = j; + i__4 = j; + i__5 = i__ + j * a_dim1; + r__3 = ((r__1 = a[i__5].r, abs(r__1)) + (r__2 = r_imag(&a[ + i__ + j * a_dim1]), abs(r__2))) * s[i__]; + q__1.r = work[i__4].r + r__3, q__1.i = work[i__4].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + } + } + } +/* avg = s^T beta / n */ + avg = 0.f; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__2.r = s[i__2] * work[i__3].r, q__2.i = s[i__2] * work[i__3].i; + q__1.r = avg + q__2.r, q__1.i = q__2.i; + avg = q__1.r; + } + avg /= *n; + std = 0.f; + i__1 = *n << 1; + for (i__ = *n + 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ - *n; + i__4 = i__ - *n; + q__2.r = s[i__3] * work[i__4].r, q__2.i = s[i__3] * work[i__4].i; + q__1.r = q__2.r - avg, q__1.i = q__2.i; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; + } + classq_(n, &work[*n + 1], &c__1, &scale, &sumsq); + std = scale * sqrt(sumsq / *n); + if (std < tol * avg) { + goto L999; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + t = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + i__ * + a_dim1]), abs(r__2)); + si = s[i__]; + c2 = (*n - 1) * t; + i__2 = *n - 2; + i__3 = i__; + r__1 = t * si; + q__2.r = work[i__3].r - r__1, q__2.i = work[i__3].i; + d__1 = (doublereal) i__2; + q__1.r = d__1 * q__2.r, q__1.i = d__1 * q__2.i; + c1 = q__1.r; + r__1 = -(t * si) * si; + i__2 = i__; + d__1 = 2.; + q__4.r = d__1 * work[i__2].r, q__4.i = d__1 * work[i__2].i; + q__3.r = si * q__4.r, q__3.i = si * q__4.i; + q__2.r = r__1 + q__3.r, q__2.i = q__3.i; + r__2 = *n * avg; + q__1.r = q__2.r - r__2, q__1.i = q__2.i; + c0 = q__1.r; + d__ = c1 * c1 - c0 * 4 * c2; + if (d__ <= 0.f) { + *info = -1; + return 0; + } + si = c0 * -2 / (c1 + sqrt(d__)); + d__ = si - s[i__]; + u = 0.f; + if (up) { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * a_dim1; + t = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + + i__ * a_dim1]), abs(r__2)); + u += s[j] * t; + i__3 = j; + i__4 = j; + r__1 = d__ * t; + q__1.r = work[i__4].r + r__1, q__1.i = work[i__4].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = i__ + j * a_dim1; + t = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + + j * a_dim1]), abs(r__2)); + u += s[j] * t; + i__3 = j; + i__4 = j; + r__1 = d__ * t; + q__1.r = work[i__4].r + r__1, q__1.i = work[i__4].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + } + } else { + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + j * a_dim1; + t = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + + j * a_dim1]), abs(r__2)); + u += s[j] * t; + i__3 = j; + i__4 = j; + r__1 = d__ * t; + q__1.r = work[i__4].r + r__1, q__1.i = work[i__4].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + i__3 = j + i__ * a_dim1; + t = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + + i__ * a_dim1]), abs(r__2)); + u += s[j] * t; + i__3 = j; + i__4 = j; + r__1 = d__ * t; + q__1.r = work[i__4].r + r__1, q__1.i = work[i__4].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; + } + } + i__2 = i__; + q__4.r = u + work[i__2].r, q__4.i = work[i__2].i; + q__3.r = d__ * q__4.r, q__3.i = d__ * q__4.i; + d__1 = (doublereal) (*n); + q__2.r = q__3.r / d__1, q__2.i = q__3.i / d__1; + q__1.r = avg + q__2.r, q__1.i = q__2.i; + avg = q__1.r; + s[i__] = si; + } + } +L999: + smlnum = slamch_("SAFEMIN"); + bignum = 1.f / smlnum; + smin = bignum; + smax = 0.f; + t = 1.f / sqrt(avg); + base = slamch_("B"); + u = 1.f / log(base); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = (integer) (u * log(s[i__] * t)); + s[i__] = pow_ri(&base, &i__2); +/* Computing MIN */ + r__1 = smin, r__2 = s[i__]; + smin = f2cmin(r__1,r__2); +/* Computing MAX */ + r__1 = smax, r__2 = s[i__]; + smax = f2cmax(r__1,r__2); + } + *scond = f2cmax(smin,smlnum) / f2cmin(smax,bignum); + + return 0; +} /* cheequb_ */ + diff --git a/lapack-netlib/SRC/cheev.c b/lapack-netlib/SRC/cheev.c new file mode 100644 index 000000000..34de62193 --- /dev/null +++ b/lapack-netlib/SRC/cheev.c @@ -0,0 +1,721 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matr +ices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */ +/* INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEEV computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,2*N-1). */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the blocksize for CHETRD returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (f2cmax(1, 3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int cheev_(char *jobz, char *uplo, integer *n, complex *a, + integer *lda, real *w, complex *work, integer *lwork, real *rwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + integer inde; + real anrm; + integer imax; + real rmin, rmax, sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + logical lower, wantz; + integer nb; + extern real clanhe_(char *, char *, integer *, complex *, integer *, real + *); + integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer + *, real *, real *, complex *, complex *, integer *, integer *); + real safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + integer indtau, indwrk; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), cungtr_(char *, + integer *, complex *, integer *, complex *, complex *, integer *, + integer *), ssterf_(integer *, real *, real *, integer *); + integer llwork; + real smlnum; + integer lwkopt; + logical lquery; + real eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,i__2); + work[1].r = (real) lwkopt, work[1].i = 0.f; + +/* Computing MAX */ + i__1 = 1, i__2 = (*n << 1) - 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + work[1].r = 1.f, work[1].i = 0.f; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + clascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call CHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, first call */ +/* CUNGTR to generate the unitary matrix, then call CSTEQR. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + cungtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], & + llwork, &iinfo); + indwrk = inde + *n; + csteqr_(jobz, n, &w[1], &rwork[inde], &a[a_offset], lda, &rwork[ + indwrk], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHEEV */ + +} /* cheev_ */ + diff --git a/lapack-netlib/SRC/cheev_2stage.c b/lapack-netlib/SRC/cheev_2stage.c new file mode 100644 index 000000000..039e95b50 --- /dev/null +++ b/lapack-netlib/SRC/cheev_2stage.c @@ -0,0 +1,783 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for +HE matrices */ + +/* @generated from zheev_2stage.f, fortran z -> c, Sat Nov 5 23:18:06 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEEV_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, */ +/* RWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (f2cmax(1, 3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexHEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int cheev_2stage_(char *jobz, char *uplo, integer *n, + complex *a, integer *lda, real *w, complex *work, integer *lwork, + real *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + real r__1; + + /* Local variables */ + integer inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + real anrm; + integer imax; + real rmin, rmax; + extern /* Subroutine */ int chetrd_2stage_(char *, char *, integer *, + complex *, integer *, real *, real *, complex *, complex *, + integer *, complex *, integer *, integer *); + real sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + integer lhtrd, lwmin; + logical lower; + integer lwtrd; + logical wantz; + integer ib, kd; + extern real clanhe_(char *, char *, integer *, complex *, integer *, real + *); + integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *); + extern real slamch_(char *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + integer indtau, indwrk; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), cungtr_(char *, + integer *, complex *, integer *, complex *, complex *, integer *, + integer *), ssterf_(integer *, real *, real *, integer *); + integer llwork; + real smlnum; + logical lquery; + real eps; + integer indhous; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + + if (*info == 0) { + kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", jobz, n, &c_n1, &c_n1, & + c_n1); + ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1); + lhtrd = ilaenv2stage_(&c__3, "CHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "CHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwmin = *n + lhtrd + lwtrd; + work[1].r = (real) lwmin, work[1].i = 0.f; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEV_2STAGE ", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + work[1].r = 1.f, work[1].i = 0.f; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + clascl_(uplo, &c__0, &c__0, &c_b28, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indhous = indtau + *n; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + + chetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], & + work[indtau], &work[indhous], &lhtrd, &work[indwrk], &llwork, & + iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, first call */ +/* CUNGTR to generate the unitary matrix, then call CSTEQR. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + cungtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], & + llwork, &iinfo); + indwrk = inde + *n; + csteqr_(jobz, n, &w[1], &rwork[inde], &a[a_offset], lda, &rwork[ + indwrk], info); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (real) lwmin, work[1].i = 0.f; + + return 0; + +/* End of CHEEV_2STAGE */ + +} /* cheev_2stage__ */ + diff --git a/lapack-netlib/SRC/cheevd.c b/lapack-netlib/SRC/cheevd.c new file mode 100644 index 000000000..6c00f11d7 --- /dev/null +++ b/lapack-netlib/SRC/cheevd.c @@ -0,0 +1,826 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHEEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEEVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */ +/* LRWORK, IWORK, LIWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEEVD computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A. If eigenvectors are desired, it uses a */ +/* > divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. */ +/* > If N <= 1, LWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, */ +/* > dimension (LRWORK) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of the array RWORK. */ +/* > If N <= 1, LRWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i and JOBZ = 'N', then the algorithm failed */ +/* > to converge; i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > if INFO = i and JOBZ = 'V', then the algorithm failed */ +/* > to compute an eigenvalue while working on the submatrix */ +/* > lying in rows and columns INFO/(N+1) through */ +/* > mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > Modified description of INFO. Sven, 16 Feb 05. */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cheevd_(char *jobz, char *uplo, integer *n, complex *a, + integer *lda, real *w, complex *work, integer *lwork, real *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + integer inde; + real anrm; + integer imax; + real rmin, rmax; + integer lopt; + real sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + integer lwmin, liopt; + logical lower; + integer llrwk, lropt; + logical wantz; + integer indwk2, llwrk2; + extern real clanhe_(char *, char *, integer *, complex *, integer *, real + *); + integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, + integer *, complex *, integer *, real *, integer *, integer *, + integer *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer + *, real *, real *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer + *, complex *, integer *); + real safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + integer indtau, indrwk, indwrk, liwmin; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + integer lrwmin; + extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + integer llwork; + real smlnum; + logical lquery; + real eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + } else { + if (wantz) { + lwmin = (*n << 1) + *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1; + lrwmin = *n; + liwmin = 1; + } +/* Computing MAX */ + i__1 = lwmin, i__2 = *n + ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, + &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lopt = f2cmax(i__1,i__2); + lropt = lrwmin; + liopt = liwmin; + } + work[1].r = (real) lopt, work[1].i = 0.f; + rwork[1] = (real) lropt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + clascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call CHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + indrwk = inde + *n; + indwk2 = indwrk + *n * *n; + llwork = *lwork - indwrk + 1; + llwrk2 = *lwork - indwk2 + 1; + llrwk = *lrwork - indrwk + 1; + chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, first call */ +/* CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ +/* tridiagonal matrix, then call CUNMTR to multiply it to the */ +/* Householder transformations represented as Householder vectors in */ +/* A. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], + &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + + work[1].r = (real) lopt, work[1].i = 0.f; + rwork[1] = (real) lropt; + iwork[1] = liopt; + + return 0; + +/* End of CHEEVD */ + +} /* cheevd_ */ + diff --git a/lapack-netlib/SRC/cheevd_2stage.c b/lapack-netlib/SRC/cheevd_2stage.c new file mode 100644 index 000000000..e198ce8e4 --- /dev/null +++ b/lapack-netlib/SRC/cheevd_2stage.c @@ -0,0 +1,883 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + HE matrices */ + +/* @generated from zheevd_2stage.f, fortran z -> c, Sat Nov 5 23:18:14 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEEVD_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, */ +/* RWORK, LRWORK, IWORK, LIWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. If eigenvectors are desired, it uses a */ +/* > divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N <= 1, LWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N+1 */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N+1 */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2 */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, */ +/* > dimension (LRWORK) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of the array RWORK. */ +/* > If N <= 1, LRWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i and JOBZ = 'N', then the algorithm failed */ +/* > to converge; i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > if INFO = i and JOBZ = 'V', then the algorithm failed */ +/* > to compute an eigenvalue while working on the submatrix */ +/* > lying in rows and columns INFO/(N+1) through */ +/* > mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexHEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > Modified description of INFO. Sven, 16 Feb 05. */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int cheevd_2stage_(char *jobz, char *uplo, integer *n, + complex *a, integer *lda, real *w, complex *work, integer *lwork, + real *rwork, integer *lrwork, integer *iwork, integer *liwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + real r__1; + + /* Local variables */ + integer inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + real anrm; + integer imax; + real rmin, rmax; + extern /* Subroutine */ int chetrd_2stage_(char *, char *, integer *, + complex *, integer *, real *, real *, complex *, complex *, + integer *, complex *, integer *, integer *); + real sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + integer lhtrd, lwmin; + logical lower; + integer llrwk, lwtrd; + logical wantz; + integer indwk2, ib, llwrk2, kd; + extern real clanhe_(char *, char *, integer *, complex *, integer *, real + *); + integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, + integer *, complex *, integer *, real *, integer *, integer *, + integer *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + integer indtau, indrwk, indwrk, liwmin; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + integer lrwmin; + extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + integer llwork; + real smlnum; + logical lquery; + real eps; + integer indhous; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + } else { + kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", jobz, n, &c_n1, &c_n1, + &c_n1); + ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", jobz, n, &kd, &c_n1, & + c_n1); + lhtrd = ilaenv2stage_(&c__3, "CHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "CHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + if (wantz) { + lwmin = (*n << 1) + *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1 + lhtrd + lwtrd; + lrwmin = *n; + liwmin = 1; + } + } + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEVD_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + clascl_(uplo, &c__0, &c__0, &c_b28, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indrwk = inde + *n; + llrwk = *lrwork - indrwk + 1; + indtau = 1; + indhous = indtau + *n; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + + chetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], & + work[indtau], &work[indhous], &lhtrd, &work[indwrk], &llwork, & + iinfo); + +/* For eigenvalues only, call SSTERF. For eigenvectors, first call */ +/* CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ +/* tridiagonal matrix, then call CUNMTR to multiply it to the */ +/* Householder transformations represented as Householder vectors in */ +/* A. */ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], + &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of CHEEVD_2STAGE */ + +} /* cheevd_2stage__ */ + diff --git a/lapack-netlib/SRC/cheevr.c b/lapack-netlib/SRC/cheevr.c new file mode 100644 index 000000000..4acdc8ffc --- /dev/null +++ b/lapack-netlib/SRC/cheevr.c @@ -0,0 +1,1184 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHEEVR computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEEVR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, */ +/* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, */ +/* RWORK, LRWORK, IWORK, LIWORK, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, */ +/* $ M, N */ +/* REAL ABSTOL, VL, VU */ +/* INTEGER ISUPPZ( * ), IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEEVR computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian matrix A. Eigenvalues and eigenvectors can */ +/* > be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > */ +/* > CHEEVR first reduces the matrix A to tridiagonal form T with a call */ +/* > to CHETRD. Then, whenever possible, CHEEVR calls CSTEMR to compute */ +/* > the eigenspectrum using Relatively Robust Representations. CSTEMR */ +/* > computes eigenvalues by the dqds algorithm, while orthogonal */ +/* > eigenvectors are computed from various "good" L D L^T representations */ +/* > (also known as Relatively Robust Representations). Gram-Schmidt */ +/* > orthogonalization is avoided as far as possible. More specifically, */ +/* > the various steps of the algorithm are as follows. */ +/* > */ +/* > For each unreduced block (submatrix) of T, */ +/* > (a) Compute T - sigma I = L D L^T, so that L and D */ +/* > define all the wanted eigenvalues to high relative accuracy. */ +/* > This means that small relative changes in the entries of D and L */ +/* > cause only small relative changes in the eigenvalues and */ +/* > eigenvectors. The standard (unfactored) representation of the */ +/* > tridiagonal matrix T does not have this property in general. */ +/* > (b) Compute the eigenvalues to suitable accuracy. */ +/* > If the eigenvectors are desired, the algorithm attains full */ +/* > accuracy of the computed eigenvalues only right before */ +/* > the corresponding vectors have to be computed, see steps c) and d). */ +/* > (c) For each cluster of close eigenvalues, select a new */ +/* > shift close to the cluster, find a new factorization, and refine */ +/* > the shifted eigenvalues to suitable accuracy. */ +/* > (d) For each eigenvalue with a large enough relative separation compute */ +/* > the corresponding eigenvector by forming a rank revealing twisted */ +/* > factorization. Go back to (c) for any clusters that remain. */ +/* > */ +/* > The desired accuracy of the output can be specified by the input */ +/* > parameter ABSTOL. */ +/* > */ +/* > For more details, see DSTEMR's documentation and: */ +/* > - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ +/* > to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ +/* > Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ +/* > - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ +/* > Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ +/* > 2004. Also LAPACK Working Note 154. */ +/* > - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ +/* > tridiagonal eigenvalue/eigenvector problem", */ +/* > Computer Science Division Technical Report No. UCB/CSD-97-971, */ +/* > UC Berkeley, May 1997. */ +/* > */ +/* > */ +/* > Note 1 : CHEEVR calls CSTEMR when the full spectrum is requested */ +/* > on machines which conform to the ieee-754 floating point standard. */ +/* > CHEEVR calls SSTEBZ and CSTEIN on non-ieee machines and */ +/* > when partial spectrum requests are made. */ +/* > */ +/* > Normal execution of CSTEMR may create NaNs and infinities and */ +/* > hence may abort due to a floating point exception in environments */ +/* > which do not handle NaNs and infinities in the ieee standard default */ +/* > manner. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and */ +/* > CSTEIN are called */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is REAL */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is REAL */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is REAL */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing A to tridiagonal form. */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > */ +/* > If high relative accuracy is important, set ABSTOL to */ +/* > SLAMCH( 'Safe minimum' ). Doing so will guarantee that */ +/* > eigenvalues are computed to high relative accuracy when */ +/* > possible in future releases. The current code does not */ +/* > make any guarantees about high relative accuracy, but */ +/* > future releases will. See J. Barlow and J. Demmel, */ +/* > "Computing Accurate Eigensystems of Scaled Diagonally */ +/* > Dominant Matrices", LAPACK Working Note #7, for a discussion */ +/* > of which matrices define their eigenvalues to high relative */ +/* > accuracy. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > The first M elements contain the selected eigenvalues in */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ISUPPZ */ +/* > \verbatim */ +/* > ISUPPZ is INTEGER array, dimension ( 2*f2cmax(1,M) ) */ +/* > The support of the eigenvectors in Z, i.e., the indices */ +/* > indicating the nonzero elements in Z. The i-th eigenvector */ +/* > is nonzero only in elements ISUPPZ( 2*i-1 ) through */ +/* > ISUPPZ( 2*i ). This is an output of CSTEMR (tridiagonal */ +/* > matrix). The support of the eigenvectors of A is typically */ +/* > 1:N because of the unitary transformations applied by CUNMTR. */ +/* > Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the f2cmax of the blocksize for CHETRD and for */ +/* > CUNMTR as returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (MAX(1,LRWORK)) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal */ +/* > (and minimal) LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The length of the array RWORK. LRWORK >= f2cmax(1,24*N). */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal */ +/* > (and minimal) LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. LIWORK >= f2cmax(1,10*N). */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: Internal error */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexHEeigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Inderjit Dhillon, IBM Almaden, USA \n */ +/* > Osni Marques, LBNL/NERSC, USA \n */ +/* > Ken Stanley, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > Jason Riedy, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int cheevr_(char *jobz, char *range, char *uplo, integer *n, + complex *a, integer *lda, real *vl, real *vu, integer *il, integer * + iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, + integer *isuppz, complex *work, integer *lwork, real *rwork, integer * + lrwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + real anrm; + integer imax; + real rmin, rmax; + logical test; + integer itmp1, i__, j, indrd, indre; + real sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + char order[1]; + integer indwk; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + integer lwmin; + logical lower; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantz; + integer nb, jj; + logical alleig, indeig; + integer iscale, ieeeok, indibl, indrdd, indifl, indree; + logical valeig; + extern real slamch_(char *); + extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer + *, real *, real *, complex *, complex *, integer *, integer *), csscal_(integer *, real *, complex *, integer *); + real safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real abstll, bignum; + integer indtau, indisp; + extern /* Subroutine */ int cstein_(integer *, real *, real *, integer *, + real *, integer *, integer *, complex *, integer *, real *, + integer *, integer *, integer *); + integer indiwo, indwkn; + extern real clansy_(char *, char *, integer *, complex *, integer *, real + *); + extern /* Subroutine */ int cstemr_(char *, char *, integer *, real *, + real *, real *, real *, integer *, integer *, integer *, real *, + complex *, integer *, integer *, integer *, logical *, real *, + integer *, integer *, integer *, integer *); + integer indrwk, liwmin; + logical tryrac; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + integer lrwmin, llwrkn, llwork, nsplit; + real smlnum; + extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), sstebz_( + char *, char *, integer *, real *, real *, integer *, integer *, + real *, real *, real *, integer *, integer *, real *, integer *, + integer *, real *, integer *, integer *); + logical lquery; + integer lwkopt; + real eps, vll, vuu; + integer llrwork; + real tmp1; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --isuppz; + --work; + --rwork; + --iwork; + + /* Function Body */ + ieeeok = ilaenv_(&c__10, "CHEEVR", "N", &c__1, &c__2, &c__3, &c__4, ( + ftnlen)6, (ftnlen)1); + + lower = lsame_(uplo, "L"); + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + +/* Computing MAX */ + i__1 = 1, i__2 = *n * 24; + lrwmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n * 10; + liwmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + lwmin = f2cmax(i__1,i__2); + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -9; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "CUNMTR", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nb = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,lwmin); + work[1].r = (real) lwkopt, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -18; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -20; + } else if (*liwork < liwmin && ! lquery) { + *info = -22; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEVR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + if (*n == 1) { + work[1].r = 2.f, work[1].i = 0.f; + if (alleig || indeig) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } else { + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + if (*vl < a[i__1].r && *vu >= a[i__2].r) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + isuppz[1] = 1; + isuppz[2] = 1; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin)); + rmax = f2cmin(r__1,r__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } + anrm = clansy_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + csscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + csscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.f) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } +/* Initialize indices into workspaces. Note: The IWORK indices are */ +/* used only if SSTERF or CSTEMR fail. */ +/* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the */ +/* elementary reflectors used in CHETRD. */ + indtau = 1; +/* INDWK is the starting offset of the remaining complex workspace, */ +/* and LLWORK is the remaining complex workspace size. */ + indwk = indtau + *n; + llwork = *lwork - indwk + 1; +/* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal */ +/* entries. */ + indrd = 1; +/* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the */ +/* tridiagonal matrix from CHETRD. */ + indre = indrd + *n; +/* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over */ +/* -written by CSTEMR (the SSTERF path copies the diagonal to W). */ + indrdd = indre + *n; +/* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over */ +/* -written while computing the eigenvalues in SSTERF and CSTEMR. */ + indree = indrdd + *n; +/* INDRWK is the starting offset of the left-over real workspace, and */ +/* LLRWORK is the remaining workspace size. */ + indrwk = indree + *n; + llrwork = *lrwork - indrwk + 1; +/* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in SSTEBZ and */ +/* stores the block indices of each of the M<=N eigenvalues. */ + indibl = 1; +/* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in SSTEBZ and */ +/* stores the starting and finishing indices of each block. */ + indisp = indibl + *n; +/* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */ +/* that corresponding to eigenvectors that fail to converge in */ +/* SSTEIN. This information is discarded; if any fail, the driver */ +/* returns INFO > 0. */ + indifl = indisp + *n; +/* INDIWO is the offset of the remaining integer workspace. */ + indiwo = indifl + *n; + +/* Call CHETRD to reduce Hermitian matrix to tridiagonal form. */ + + chetrd_(uplo, n, &a[a_offset], lda, &rwork[indrd], &rwork[indre], &work[ + indtau], &work[indwk], &llwork, &iinfo); + +/* If all eigenvalues are desired */ +/* then call SSTERF or CSTEMR and CUNMTR. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && ieeeok == 1) { + if (! wantz) { + scopy_(n, &rwork[indrd], &c__1, &w[1], &c__1); + i__1 = *n - 1; + scopy_(&i__1, &rwork[indre], &c__1, &rwork[indree], &c__1); + ssterf_(n, &w[1], &rwork[indree], info); + } else { + i__1 = *n - 1; + scopy_(&i__1, &rwork[indre], &c__1, &rwork[indree], &c__1); + scopy_(n, &rwork[indrd], &c__1, &rwork[indrdd], &c__1); + + if (*abstol <= *n * 2.f * eps) { + tryrac = TRUE_; + } else { + tryrac = FALSE_; + } + cstemr_(jobz, "A", n, &rwork[indrdd], &rwork[indree], vl, vu, il, + iu, m, &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, + &rwork[indrwk], &llrwork, &iwork[1], liwork, info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by CSTEMR. */ + + if (wantz && *info == 0) { + indwkn = indwk; + llwrkn = *lwork - indwkn + 1; + cunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau] + , &z__[z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + } + + + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. */ +/* Also call SSTEBZ and CSTEIN if CSTEMR fails. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + sstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indrd], & + rwork[indre], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwo], info); + + if (wantz) { + cstein_(n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwo], &iwork[indifl], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by CSTEIN. */ + + indwkn = indwk; + llwrkn = *lwork - indwkn + 1; + cunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + } +/* L50: */ + } + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1].r = (real) lwkopt, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of CHEEVR */ + +} /* cheevr_ */ + diff --git a/lapack-netlib/SRC/cheevr_2stage.c b/lapack-netlib/SRC/cheevr_2stage.c new file mode 100644 index 000000000..8cb50273e --- /dev/null +++ b/lapack-netlib/SRC/cheevr_2stage.c @@ -0,0 +1,1235 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHEEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + HE matrices */ + +/* @generated from zheevr_2stage.f, fortran z -> c, Sat Nov 5 23:18:11 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEEVR_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, */ +/* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, */ +/* WORK, LWORK, RWORK, LRWORK, IWORK, */ +/* LIWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, */ +/* $ M, N */ +/* REAL ABSTOL, VL, VU */ +/* INTEGER ISUPPZ( * ), IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. Eigenvalues and eigenvectors can */ +/* > be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > */ +/* > CHEEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call */ +/* > to CHETRD. Then, whenever possible, CHEEVR_2STAGE calls CSTEMR to compute */ +/* > eigenspectrum using Relatively Robust Representations. CSTEMR */ +/* > computes eigenvalues by the dqds algorithm, while orthogonal */ +/* > eigenvectors are computed from various "good" L D L^T representations */ +/* > (also known as Relatively Robust Representations). Gram-Schmidt */ +/* > orthogonalization is avoided as far as possible. More specifically, */ +/* > the various steps of the algorithm are as follows. */ +/* > */ +/* > For each unreduced block (submatrix) of T, */ +/* > (a) Compute T - sigma I = L D L^T, so that L and D */ +/* > define all the wanted eigenvalues to high relative accuracy. */ +/* > This means that small relative changes in the entries of D and L */ +/* > cause only small relative changes in the eigenvalues and */ +/* > eigenvectors. The standard (unfactored) representation of the */ +/* > tridiagonal matrix T does not have this property in general. */ +/* > (b) Compute the eigenvalues to suitable accuracy. */ +/* > If the eigenvectors are desired, the algorithm attains full */ +/* > accuracy of the computed eigenvalues only right before */ +/* > the corresponding vectors have to be computed, see steps c) and d). */ +/* > (c) For each cluster of close eigenvalues, select a new */ +/* > shift close to the cluster, find a new factorization, and refine */ +/* > the shifted eigenvalues to suitable accuracy. */ +/* > (d) For each eigenvalue with a large enough relative separation compute */ +/* > the corresponding eigenvector by forming a rank revealing twisted */ +/* > factorization. Go back to (c) for any clusters that remain. */ +/* > */ +/* > The desired accuracy of the output can be specified by the input */ +/* > parameter ABSTOL. */ +/* > */ +/* > For more details, see DSTEMR's documentation and: */ +/* > - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations */ +/* > to compute orthogonal eigenvectors of symmetric tridiagonal matrices," */ +/* > Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. */ +/* > - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and */ +/* > Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, */ +/* > 2004. Also LAPACK Working Note 154. */ +/* > - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric */ +/* > tridiagonal eigenvalue/eigenvector problem", */ +/* > Computer Science Division Technical Report No. UCB/CSD-97-971, */ +/* > UC Berkeley, May 1997. */ +/* > */ +/* > */ +/* > Note 1 : CHEEVR_2STAGE calls CSTEMR when the full spectrum is requested */ +/* > on machines which conform to the ieee-754 floating point standard. */ +/* > CHEEVR_2STAGE calls SSTEBZ and CSTEIN on non-ieee machines and */ +/* > when partial spectrum requests are made. */ +/* > */ +/* > Normal execution of CSTEMR may create NaNs and infinities and */ +/* > hence may abort due to a floating point exception in environments */ +/* > which do not handle NaNs and infinities in the ieee standard default */ +/* > manner. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and */ +/* > CSTEIN are called */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is REAL */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is REAL */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is REAL */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing A to tridiagonal form. */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > */ +/* > If high relative accuracy is important, set ABSTOL to */ +/* > SLAMCH( 'Safe minimum' ). Doing so will guarantee that */ +/* > eigenvalues are computed to high relative accuracy when */ +/* > possible in future releases. The current code does not */ +/* > make any guarantees about high relative accuracy, but */ +/* > future releases will. See J. Barlow and J. Demmel, */ +/* > "Computing Accurate Eigensystems of Scaled Diagonally */ +/* > Dominant Matrices", LAPACK Working Note #7, for a discussion */ +/* > of which matrices define their eigenvalues to high relative */ +/* > accuracy. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > The first M elements contain the selected eigenvalues in */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ISUPPZ */ +/* > \verbatim */ +/* > ISUPPZ is INTEGER array, dimension ( 2*f2cmax(1,M) ) */ +/* > The support of the eigenvectors in Z, i.e., the indices */ +/* > indicating the nonzero elements in Z. The i-th eigenvector */ +/* > is nonzero only in elements ISUPPZ( 2*i-1 ) through */ +/* > ISUPPZ( 2*i ). This is an output of CSTEMR (tridiagonal */ +/* > matrix). The support of the eigenvectors of A is typically */ +/* > 1:N because of the unitary transformations applied by CUNMTR. */ +/* > Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, 26*N, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (MAX(1,LRWORK)) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal */ +/* > (and minimal) LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The length of the array RWORK. LRWORK >= f2cmax(1,24*N). */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal */ +/* > (and minimal) LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. LIWORK >= f2cmax(1,10*N). */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: Internal error */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexHEeigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Inderjit Dhillon, IBM Almaden, USA \n */ +/* > Osni Marques, LBNL/NERSC, USA \n */ +/* > Ken Stanley, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > Jason Riedy, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > */ +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int cheevr_2stage_(char *jobz, char *range, char *uplo, + integer *n, complex *a, integer *lda, real *vl, real *vu, integer *il, + integer *iu, real *abstol, integer *m, real *w, complex *z__, + integer *ldz, integer *isuppz, complex *work, integer *lwork, real * + rwork, integer *lrwork, integer *iwork, integer *liwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + real anrm; + integer imax; + real rmin, rmax; + logical test; + integer itmp1, i__, j; + extern /* Subroutine */ int chetrd_2stage_(char *, char *, integer *, + complex *, integer *, real *, real *, complex *, complex *, + integer *, complex *, integer *, integer *); + integer indrd, indre; + real sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + char order[1]; + integer indwk, lhtrd; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + integer lwmin; + logical lower; + integer lwtrd; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantz; + integer ib, kd, jj; + logical alleig, indeig; + integer iscale, ieeeok, indibl, indrdd, indifl, indree; + logical valeig; + extern real slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *); + real safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real abstll, bignum; + integer indtau, indisp; + extern /* Subroutine */ int cstein_(integer *, real *, real *, integer *, + real *, integer *, integer *, complex *, integer *, real *, + integer *, integer *, integer *); + integer indiwo, indwkn; + extern real clansy_(char *, char *, integer *, complex *, integer *, real + *); + extern /* Subroutine */ int cstemr_(char *, char *, integer *, real *, + real *, real *, real *, integer *, integer *, integer *, real *, + complex *, integer *, integer *, integer *, logical *, real *, + integer *, integer *, integer *, integer *); + integer indrwk, liwmin; + logical tryrac; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + integer lrwmin, llwrkn, llwork, nsplit; + real smlnum; + extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, + real *, integer *, integer *, real *, real *, real *, integer *, + integer *, real *, integer *, integer *, real *, integer *, + integer *), cunmtr_(char *, char *, char *, + integer *, integer *, complex *, integer *, complex *, complex *, + integer *, complex *, integer *, integer *); + logical lquery; + real eps, vll, vuu; + integer indhous, llrwork; + real tmp1; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --isuppz; + --work; + --rwork; + --iwork; + + /* Function Body */ + ieeeok = ilaenv_(&c__10, "CHEEVR", "N", &c__1, &c__2, &c__3, &c__4, ( + ftnlen)6, (ftnlen)1); + + lower = lsame_(uplo, "L"); + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", jobz, n, &c_n1, &c_n1, &c_n1); + ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1); + lhtrd = ilaenv2stage_(&c__3, "CHETRD_2STAGE", jobz, n, &kd, &ib, &c_n1); + lwtrd = ilaenv2stage_(&c__4, "CHETRD_2STAGE", jobz, n, &kd, &ib, &c_n1); + lwmin = *n + lhtrd + lwtrd; +/* Computing MAX */ + i__1 = 1, i__2 = *n * 24; + lrwmin = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n * 10; + liwmin = f2cmax(i__1,i__2); + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -9; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } + } + + if (*info == 0) { + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -18; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -20; + } else if (*liwork < liwmin && ! lquery) { + *info = -22; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEVR_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + if (*n == 1) { + work[1].r = 2.f, work[1].i = 0.f; + if (alleig || indeig) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } else { + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + if (*vl < a[i__1].r && *vu >= a[i__2].r) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + isuppz[1] = 1; + isuppz[2] = 1; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin)); + rmax = f2cmin(r__1,r__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } + anrm = clansy_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + csscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + csscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.f) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } +/* Initialize indices into workspaces. Note: The IWORK indices are */ +/* used only if SSTERF or CSTEMR fail. */ +/* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the */ +/* elementary reflectors used in CHETRD. */ + indtau = 1; +/* INDWK is the starting offset of the remaining complex workspace, */ +/* and LLWORK is the remaining complex workspace size. */ + indhous = indtau + *n; + indwk = indhous + lhtrd; + llwork = *lwork - indwk + 1; +/* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal */ +/* entries. */ + indrd = 1; +/* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the */ +/* tridiagonal matrix from CHETRD. */ + indre = indrd + *n; +/* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over */ +/* -written by CSTEMR (the SSTERF path copies the diagonal to W). */ + indrdd = indre + *n; +/* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over */ +/* -written while computing the eigenvalues in SSTERF and CSTEMR. */ + indree = indrdd + *n; +/* INDRWK is the starting offset of the left-over real workspace, and */ +/* LLRWORK is the remaining workspace size. */ + indrwk = indree + *n; + llrwork = *lrwork - indrwk + 1; +/* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in SSTEBZ and */ +/* stores the block indices of each of the M<=N eigenvalues. */ + indibl = 1; +/* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in SSTEBZ and */ +/* stores the starting and finishing indices of each block. */ + indisp = indibl + *n; +/* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */ +/* that corresponding to eigenvectors that fail to converge in */ +/* CSTEIN. This information is discarded; if any fail, the driver */ +/* returns INFO > 0. */ + indifl = indisp + *n; +/* INDIWO is the offset of the remaining integer workspace. */ + indiwo = indifl + *n; + +/* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */ + + chetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &rwork[indrd], &rwork[ + indre], &work[indtau], &work[indhous], &lhtrd, &work[indwk], & + llwork, &iinfo); + +/* If all eigenvalues are desired */ +/* then call SSTERF or CSTEMR and CUNMTR. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && ieeeok == 1) { + if (! wantz) { + scopy_(n, &rwork[indrd], &c__1, &w[1], &c__1); + i__1 = *n - 1; + scopy_(&i__1, &rwork[indre], &c__1, &rwork[indree], &c__1); + ssterf_(n, &w[1], &rwork[indree], info); + } else { + i__1 = *n - 1; + scopy_(&i__1, &rwork[indre], &c__1, &rwork[indree], &c__1); + scopy_(n, &rwork[indrd], &c__1, &rwork[indrdd], &c__1); + + if (*abstol <= *n * 2.f * eps) { + tryrac = TRUE_; + } else { + tryrac = FALSE_; + } + cstemr_(jobz, "A", n, &rwork[indrdd], &rwork[indree], vl, vu, il, + iu, m, &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, + &rwork[indrwk], &llrwork, &iwork[1], liwork, info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by CSTEMR. */ + + if (wantz && *info == 0) { + indwkn = indwk; + llwrkn = *lwork - indwkn + 1; + cunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau] + , &z__[z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + } + + + if (*info == 0) { + *m = *n; + goto L30; + } + *info = 0; + } + +/* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. */ +/* Also call SSTEBZ and CSTEIN if CSTEMR fails. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + sstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indrd], & + rwork[indre], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwo], info); + + if (wantz) { + cstein_(n, &rwork[indrd], &rwork[indre], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwo], &iwork[indifl], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by CSTEIN. */ + + indwkn = indwk; + llwrkn = *lwork - indwkn + 1; + cunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L30: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L40: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + } +/* L50: */ + } + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of CHEEVR_2STAGE */ + +} /* cheevr_2stage__ */ + diff --git a/lapack-netlib/SRC/cheevx.c b/lapack-netlib/SRC/cheevx.c new file mode 100644 index 000000000..b932d34dd --- /dev/null +++ b/lapack-netlib/SRC/cheevx.c @@ -0,0 +1,1025 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEEVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, */ +/* ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK, */ +/* IWORK, IFAIL, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N */ +/* REAL ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEEVX computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian matrix A. Eigenvalues and eigenvectors can */ +/* > be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is REAL */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is REAL */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is REAL */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing A to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*SLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*SLAMCH('S'). */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > On normal exit, the first M elements contain the selected */ +/* > eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise 2*N. */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the f2cmax of the blocksize for CHETRD and for */ +/* > CUNMTR as returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* > Their indices are stored in array IFAIL. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexHEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int cheevx_(char *jobz, char *range, char *uplo, integer *n, + complex *a, integer *lda, real *vl, real *vu, integer *il, integer * + iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, + complex *work, integer *lwork, real *rwork, integer *iwork, integer * + ifail, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + integer indd, inde; + real anrm; + integer imax; + real rmin, rmax; + logical test; + integer itmp1, i__, j, indee; + real sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + char order[1]; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + logical lower; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantz; + integer nb, jj; + extern real clanhe_(char *, char *, integer *, complex *, integer *, real + *); + logical alleig, indeig; + integer iscale, indibl; + logical valeig; + extern real slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), chetrd_(char *, integer *, complex *, integer *, real *, real + *, complex *, complex *, integer *, integer *), clacpy_( + char *, integer *, integer *, complex *, integer *, complex *, + integer *); + real safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real abstll, bignum; + integer indiwk, indisp, indtau; + extern /* Subroutine */ int cstein_(integer *, real *, real *, integer *, + real *, integer *, integer *, complex *, integer *, real *, + integer *, integer *, integer *); + integer indrwk, indwrk, lwkmin; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), cungtr_(char *, + integer *, complex *, integer *, complex *, complex *, integer *, + integer *), ssterf_(integer *, real *, real *, integer *), + cunmtr_(char *, char *, char *, integer *, integer *, complex *, + integer *, complex *, complex *, integer *, complex *, integer *, + integer *); + integer nsplit, llwork; + real smlnum; + extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, + real *, integer *, integer *, real *, real *, real *, integer *, + integer *, real *, integer *, integer *, real *, integer *, + integer *); + integer lwkopt; + logical lquery; + real eps, vll, vuu, tmp1; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + lower = lsame_(uplo, "L"); + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lquery = *lwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -9; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } + } + + if (*info == 0) { + if (*n <= 1) { + lwkmin = 1; + work[1].r = (real) lwkmin, work[1].i = 0.f; + } else { + lwkmin = *n << 1; + nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "CUNMTR", uplo, n, &c_n1, &c_n1, + &c_n1, (ftnlen)6, (ftnlen)1); + nb = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,i__2); + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*lwork < lwkmin && ! lquery) { + *info = -17; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } else if (valeig) { + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + if (*vl < a[i__1].r && *vu >= a[i__2].r) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin)); + rmax = f2cmin(r__1,r__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } + anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + csscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + csscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.f) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call CHETRD to reduce Hermitian matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + indtau = 1; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + chetrd_(uplo, n, &a[a_offset], lda, &rwork[indd], &rwork[inde], &work[ + indtau], &work[indwrk], &llwork, &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal to */ +/* zero, then call SSTERF or CUNGTR and CSTEQR. If this fails for */ +/* some eigenvalue, then try SSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.f) { + scopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + ssterf_(n, &w[1], &rwork[indee], info); + } else { + clacpy_("A", n, n, &a[a_offset], lda, &z__[z_offset], ldz); + cungtr_(uplo, n, &z__[z_offset], ldz, &work[indtau], &work[indwrk] + , &llwork, &iinfo); + i__1 = *n - 1; + scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + csteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L30: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L40; + } + *info = 0; + } + +/* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + sstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indd], & + rwork[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwk], info); + + if (wantz) { + cstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by CSTEIN. */ + + cunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwrk], &llwork, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L40: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L50: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L60: */ + } + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHEEVX */ + +} /* cheevx_ */ + diff --git a/lapack-netlib/SRC/cheevx_2stage.c b/lapack-netlib/SRC/cheevx_2stage.c new file mode 100644 index 000000000..697201290 --- /dev/null +++ b/lapack-netlib/SRC/cheevx_2stage.c @@ -0,0 +1,1084 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHEEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for + HE matrices */ + +/* @generated from zheevx_2stage.f, fortran z -> c, Sat Nov 5 23:18:09 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEEVX_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, */ +/* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, */ +/* LWORK, RWORK, IWORK, IFAIL, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N */ +/* REAL ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors */ +/* > of a complex Hermitian matrix A using the 2stage technique for */ +/* > the reduction to tridiagonal. Eigenvalues and eigenvectors can */ +/* > be selected by specifying either a range of values or a range of */ +/* > indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is REAL */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is REAL */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is REAL */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing A to tridiagonal form. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*SLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*SLAMCH('S'). */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices */ +/* > with Guaranteed High Relative Accuracy," by Demmel and */ +/* > Kahan, LAPACK Working Note #3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > On normal exit, the first M elements contain the selected */ +/* > eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, 8*N, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge. */ +/* > Their indices are stored in array IFAIL. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexHEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int cheevx_2stage_(char *jobz, char *range, char *uplo, + integer *n, complex *a, integer *lda, real *vl, real *vu, integer *il, + integer *iu, real *abstol, integer *m, real *w, complex *z__, + integer *ldz, complex *work, integer *lwork, real *rwork, integer * + iwork, integer *ifail, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + integer indd, inde; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + real anrm; + integer imax; + real rmin, rmax; + logical test; + integer itmp1, i__, j; + extern /* Subroutine */ int chetrd_2stage_(char *, char *, integer *, + complex *, integer *, real *, real *, complex *, complex *, + integer *, complex *, integer *, integer *); + integer indee; + real sigma; + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + char order[1]; + integer lhtrd; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + integer lwmin; + logical lower; + integer lwtrd; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantz; + integer ib, kd, jj; + extern real clanhe_(char *, char *, integer *, complex *, integer *, real + *); + logical alleig, indeig; + integer iscale, indibl; + logical valeig; + extern real slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), clacpy_(char *, integer *, integer *, complex *, integer *, + complex *, integer *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real abstll, bignum; + integer indiwk, indisp, indtau; + extern /* Subroutine */ int cstein_(integer *, real *, real *, integer *, + real *, integer *, integer *, complex *, integer *, real *, + integer *, integer *, integer *); + integer indrwk, indwrk; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *), cungtr_(char *, + integer *, complex *, integer *, complex *, complex *, integer *, + integer *), ssterf_(integer *, real *, real *, integer *), + cunmtr_(char *, char *, char *, integer *, integer *, complex *, + integer *, complex *, complex *, integer *, complex *, integer *, + integer *); + integer nsplit, llwork; + real smlnum; + extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, + real *, integer *, integer *, real *, real *, real *, integer *, + integer *, real *, integer *, integer *, real *, integer *, + integer *); + logical lquery; + real eps, vll, vuu; + integer indhous; + real tmp1; + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + lower = lsame_(uplo, "L"); + wantz = lsame_(jobz, "V"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(jobz, "N")) { + *info = -1; + } else if (! (alleig || valeig || indeig)) { + *info = -2; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -8; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -9; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -10; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -15; + } + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + work[1].r = (real) lwmin, work[1].i = 0.f; + } else { + kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", jobz, n, &c_n1, &c_n1, + &c_n1); + ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", jobz, n, &kd, &c_n1, & + c_n1); + lhtrd = ilaenv2stage_(&c__3, "CHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "CHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwmin = *n + lhtrd + lwtrd; + work[1].r = (real) lwmin, work[1].i = 0.f; + } + + if (*lwork < lwmin && ! lquery) { + *info = -17; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEVX_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (alleig || indeig) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } else if (valeig) { + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + if (*vl < a[i__1].r && *vu >= a[i__2].r) { + *m = 1; + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + } + } + if (wantz) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); +/* Computing MIN */ + r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin)); + rmax = f2cmin(r__1,r__2); + +/* Scale matrix to allowable range, if necessary. */ + + iscale = 0; + abstll = *abstol; + if (valeig) { + vll = *vl; + vuu = *vu; + } + anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + if (lower) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j + 1; + csscal_(&i__2, &sigma, &a[j + j * a_dim1], &c__1); +/* L10: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + csscal_(&j, &sigma, &a[j * a_dim1 + 1], &c__1); +/* L20: */ + } + } + if (*abstol > 0.f) { + abstll = *abstol * sigma; + } + if (valeig) { + vll = *vl * sigma; + vuu = *vu * sigma; + } + } + +/* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */ + + indd = 1; + inde = indd + *n; + indrwk = inde + *n; + indtau = 1; + indhous = indtau + *n; + indwrk = indhous + lhtrd; + llwork = *lwork - indwrk + 1; + + chetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &rwork[indd], &rwork[ + inde], &work[indtau], &work[indhous], &lhtrd, &work[indwrk], & + llwork, &iinfo); + +/* If all eigenvalues are desired and ABSTOL is less than or equal to */ +/* zero, then call SSTERF or CUNGTR and CSTEQR. If this fails for */ +/* some eigenvalue, then try SSTEBZ. */ + + test = FALSE_; + if (indeig) { + if (*il == 1 && *iu == *n) { + test = TRUE_; + } + } + if ((alleig || test) && *abstol <= 0.f) { + scopy_(n, &rwork[indd], &c__1, &w[1], &c__1); + indee = indrwk + (*n << 1); + if (! wantz) { + i__1 = *n - 1; + scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + ssterf_(n, &w[1], &rwork[indee], info); + } else { + clacpy_("A", n, n, &a[a_offset], lda, &z__[z_offset], ldz); + cungtr_(uplo, n, &z__[z_offset], ldz, &work[indtau], &work[indwrk] + , &llwork, &iinfo); + i__1 = *n - 1; + scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); + csteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & + rwork[indrwk], info); + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ifail[i__] = 0; +/* L30: */ + } + } + } + if (*info == 0) { + *m = *n; + goto L40; + } + *info = 0; + } + +/* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. */ + + if (wantz) { + *(unsigned char *)order = 'B'; + } else { + *(unsigned char *)order = 'E'; + } + indibl = 1; + indisp = indibl + *n; + indiwk = indisp + *n; + sstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indd], & + rwork[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & + rwork[indrwk], &iwork[indiwk], info); + + if (wantz) { + cstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & + iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ + indiwk], &ifail[1], info); + +/* Apply unitary matrix used in reduction to tridiagonal */ +/* form to eigenvectors returned by CSTEIN. */ + + cunmtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ + z_offset], ldz, &work[indwrk], &llwork, &iinfo); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + +L40: + if (iscale == 1) { + if (*info == 0) { + imax = *m; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + +/* If eigenvalues are not in order, then sort them, along with */ +/* eigenvectors. */ + + if (wantz) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + i__ = 0; + tmp1 = w[j]; + i__2 = *m; + for (jj = j + 1; jj <= i__2; ++jj) { + if (w[jj] < tmp1) { + i__ = jj; + tmp1 = w[jj]; + } +/* L50: */ + } + + if (i__ != 0) { + itmp1 = iwork[indibl + i__ - 1]; + w[i__] = w[j]; + iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; + w[j] = tmp1; + iwork[indibl + j - 1] = itmp1; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], + &c__1); + if (*info != 0) { + itmp1 = ifail[i__]; + ifail[i__] = ifail[j]; + ifail[j] = itmp1; + } + } +/* L60: */ + } + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (real) lwmin, work[1].i = 0.f; + + return 0; + +/* End of CHEEVX_2STAGE */ + +} /* cheevx_2stage__ */ + diff --git a/lapack-netlib/SRC/chegs2.c b/lapack-netlib/SRC/chegs2.c new file mode 100644 index 000000000..0888852e0 --- /dev/null +++ b/lapack-netlib/SRC/chegs2.c @@ -0,0 +1,767 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHEGS2 reduces a Hermitian definite generalized eigenproblem to standard form, using the factor +ization results obtained from cpotrf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEGS2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, N */ +/* COMPLEX A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEGS2 reduces a complex Hermitian-definite generalized */ +/* > eigenproblem to standard form. */ +/* > */ +/* > If ITYPE = 1, the problem is A*x = lambda*B*x, */ +/* > and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */ +/* > */ +/* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ +/* > B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L. */ +/* > */ +/* > B must have been previously factorized as U**H *U or L*L**H by ZPOTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */ +/* > = 2 or 3: compute U*A*U**H or L**H *A*L. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored, and how B has been factorized. */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n by n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n by n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the transformed matrix, stored in the */ +/* > same format as A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,N) */ +/* > The triangular factor from the Cholesky factorization of B, */ +/* > as returned by CPOTRF. */ +/* > B is modified by the routine but restored on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int chegs2_(integer *itype, char *uplo, integer *n, complex * + a, integer *lda, complex *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + real r__1, r__2; + complex q__1; + + /* Local variables */ + extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex * + , integer *, complex *, integer *, complex *, integer *); + integer k; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + logical upper; + extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *), ctrsv_(char *, char *, char *, integer *, complex *, + integer *, complex *, integer *); + complex ct; + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), + csscal_(integer *, real *, complex *, integer *), xerbla_(char *, + integer *, ftnlen); + real akk, bkk; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEGS2", &i__1, (ftnlen)6); + return 0; + } + + if (*itype == 1) { + if (upper) { + +/* Compute inv(U**H)*A*inv(U) */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the upper triangle of A(k:n,k:n) */ + + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; +/* Computing 2nd power */ + r__1 = bkk; + akk /= r__1 * r__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.f; + if (k < *n) { + i__2 = *n - k; + r__1 = 1.f / bkk; + csscal_(&i__2, &r__1, &a[k + (k + 1) * a_dim1], lda); + r__1 = akk * -.5f; + ct.r = r__1, ct.i = 0.f; + i__2 = *n - k; + clacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + i__2 = *n - k; + clacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + caxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( + k + 1) * a_dim1], lda); + i__2 = *n - k; + q__1.r = -1.f, q__1.i = 0.f; + cher2_(uplo, &i__2, &q__1, &a[k + (k + 1) * a_dim1], lda, + &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) + * a_dim1], lda); + i__2 = *n - k; + caxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( + k + 1) * a_dim1], lda); + i__2 = *n - k; + clacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + ctrsv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[ + k + 1 + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * + a_dim1], lda); + i__2 = *n - k; + clacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute inv(L)*A*inv(L**H) */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the lower triangle of A(k:n,k:n) */ + + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; +/* Computing 2nd power */ + r__1 = bkk; + akk /= r__1 * r__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.f; + if (k < *n) { + i__2 = *n - k; + r__1 = 1.f / bkk; + csscal_(&i__2, &r__1, &a[k + 1 + k * a_dim1], &c__1); + r__1 = akk * -.5f; + ct.r = r__1, ct.i = 0.f; + i__2 = *n - k; + caxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + + 1 + k * a_dim1], &c__1); + i__2 = *n - k; + q__1.r = -1.f, q__1.i = 0.f; + cher2_(uplo, &i__2, &q__1, &a[k + 1 + k * a_dim1], &c__1, + &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) + * a_dim1], lda); + i__2 = *n - k; + caxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + + 1 + k * a_dim1], &c__1); + i__2 = *n - k; + ctrsv_(uplo, "No transpose", "Non-unit", &i__2, &b[k + 1 + + (k + 1) * b_dim1], ldb, &a[k + 1 + k * a_dim1], + &c__1); + } +/* L20: */ + } + } + } else { + if (upper) { + +/* Compute U*A*U**H */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the upper triangle of A(1:k,1:k) */ + + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + ctrmv_(uplo, "No transpose", "Non-unit", &i__2, &b[b_offset], + ldb, &a[k * a_dim1 + 1], &c__1); + r__1 = akk * .5f; + ct.r = r__1, ct.i = 0.f; + i__2 = k - 1; + caxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + + 1], &c__1); + i__2 = k - 1; + cher2_(uplo, &i__2, &c_b1, &a[k * a_dim1 + 1], &c__1, &b[k * + b_dim1 + 1], &c__1, &a[a_offset], lda); + i__2 = k - 1; + caxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + + 1], &c__1); + i__2 = k - 1; + csscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1); + i__2 = k + k * a_dim1; +/* Computing 2nd power */ + r__2 = bkk; + r__1 = akk * (r__2 * r__2); + a[i__2].r = r__1, a[i__2].i = 0.f; +/* L30: */ + } + } else { + +/* Compute L**H *A*L */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the lower triangle of A(1:k,1:k) */ + + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + clacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k - 1; + ctrmv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[ + b_offset], ldb, &a[k + a_dim1], lda); + r__1 = akk * .5f; + ct.r = r__1, ct.i = 0.f; + i__2 = k - 1; + clacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + caxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + cher2_(uplo, &i__2, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1] + , ldb, &a[a_offset], lda); + i__2 = k - 1; + caxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + clacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + csscal_(&i__2, &bkk, &a[k + a_dim1], lda); + i__2 = k - 1; + clacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k + k * a_dim1; +/* Computing 2nd power */ + r__2 = bkk; + r__1 = akk * (r__2 * r__2); + a[i__2].r = r__1, a[i__2].i = 0.f; +/* L40: */ + } + } + } + return 0; + +/* End of CHEGS2 */ + +} /* chegs2_ */ + diff --git a/lapack-netlib/SRC/chegst.c b/lapack-netlib/SRC/chegst.c new file mode 100644 index 000000000..d6eae6da6 --- /dev/null +++ b/lapack-netlib/SRC/chegst.c @@ -0,0 +1,784 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHEGST */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEGST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, N */ +/* COMPLEX A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEGST reduces a complex Hermitian-definite generalized */ +/* > eigenproblem to standard form. */ +/* > */ +/* > If ITYPE = 1, the problem is A*x = lambda*B*x, */ +/* > and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */ +/* > */ +/* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ +/* > B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. */ +/* > */ +/* > B must have been previously factorized as U**H*U or L*L**H by CPOTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */ +/* > = 2 or 3: compute U*A*U**H or L**H*A*L. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored and B is factored as */ +/* > U**H*U; */ +/* > = 'L': Lower triangle of A is stored and B is factored as */ +/* > L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the transformed matrix, stored in the */ +/* > same format as A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,N) */ +/* > The triangular factor from the Cholesky factorization of B, */ +/* > as returned by CPOTRF. */ +/* > B is modified by the routine but restored on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int chegst_(integer *itype, char *uplo, integer *n, complex * + a, integer *lda, complex *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + integer k; + extern /* Subroutine */ int chemm_(char *, char *, integer *, integer *, + complex *, complex *, integer *, complex *, integer *, complex *, + complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), ctrsm_(char *, char *, + char *, char *, integer *, integer *, complex *, complex *, + integer *, complex *, integer *); + logical upper; + extern /* Subroutine */ int chegs2_(integer *, char *, integer *, complex + *, integer *, complex *, integer *, integer *), cher2k_( + char *, char *, integer *, integer *, complex *, complex *, + integer *, complex *, integer *, real *, complex *, integer *); + integer kb, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEGST", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "CHEGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + chegs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + } else { + +/* Use blocked code */ + + if (*itype == 1) { + if (upper) { + +/* Compute inv(U**H)*A*inv(U) */ + + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = f2cmin(i__3,nb); + +/* Update the upper triangle of A(k:n,k:n) */ + + chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ctrsm_("Left", uplo, "Conjugate transpose", "Non-unit" + , &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + (k + kb) * a_dim1], lda); + i__3 = *n - k - kb + 1; + q__1.r = -.5f, q__1.i = 0.f; + chemm_("Left", uplo, &kb, &i__3, &q__1, &a[k + k * + a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, + &c_b1, &a[k + (k + kb) * a_dim1], lda); + i__3 = *n - k - kb + 1; + q__1.r = -1.f, q__1.i = 0.f; + cher2k_(uplo, "Conjugate transpose", &i__3, &kb, & + q__1, &a[k + (k + kb) * a_dim1], lda, &b[k + ( + k + kb) * b_dim1], ldb, &c_b18, &a[k + kb + ( + k + kb) * a_dim1], lda) + ; + i__3 = *n - k - kb + 1; + q__1.r = -.5f, q__1.i = 0.f; + chemm_("Left", uplo, &kb, &i__3, &q__1, &a[k + k * + a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, + &c_b1, &a[k + (k + kb) * a_dim1], lda); + i__3 = *n - k - kb + 1; + ctrsm_("Right", uplo, "No transpose", "Non-unit", &kb, + &i__3, &c_b1, &b[k + kb + (k + kb) * b_dim1], + ldb, &a[k + (k + kb) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute inv(L)*A*inv(L**H) */ + + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = f2cmin(i__3,nb); + +/* Update the lower triangle of A(k:n,k:n) */ + + chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ctrsm_("Right", uplo, "Conjugate transpose", "Non-un" + "it", &i__3, &kb, &c_b1, &b[k + k * b_dim1], + ldb, &a[k + kb + k * a_dim1], lda); + i__3 = *n - k - kb + 1; + q__1.r = -.5f, q__1.i = 0.f; + chemm_("Right", uplo, &i__3, &kb, &q__1, &a[k + k * + a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & + c_b1, &a[k + kb + k * a_dim1], lda); + i__3 = *n - k - kb + 1; + q__1.r = -1.f, q__1.i = 0.f; + cher2k_(uplo, "No transpose", &i__3, &kb, &q__1, &a[k + + kb + k * a_dim1], lda, &b[k + kb + k * + b_dim1], ldb, &c_b18, &a[k + kb + (k + kb) * + a_dim1], lda); + i__3 = *n - k - kb + 1; + q__1.r = -.5f, q__1.i = 0.f; + chemm_("Right", uplo, &i__3, &kb, &q__1, &a[k + k * + a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & + c_b1, &a[k + kb + k * a_dim1], lda); + i__3 = *n - k - kb + 1; + ctrsm_("Left", uplo, "No transpose", "Non-unit", & + i__3, &kb, &c_b1, &b[k + kb + (k + kb) * + b_dim1], ldb, &a[k + kb + k * a_dim1], lda); + } +/* L20: */ + } + } + } else { + if (upper) { + +/* Compute U*A*U**H */ + + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = f2cmin(i__3,nb); + +/* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ + + i__3 = k - 1; + ctrmm_("Left", uplo, "No transpose", "Non-unit", &i__3, & + kb, &c_b1, &b[b_offset], ldb, &a[k * a_dim1 + 1], + lda); + i__3 = k - 1; + chemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * + a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[ + k * a_dim1 + 1], lda); + i__3 = k - 1; + cher2k_(uplo, "No transpose", &i__3, &kb, &c_b1, &a[k * + a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b18, + &a[a_offset], lda); + i__3 = k - 1; + chemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * + a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[ + k * a_dim1 + 1], lda); + i__3 = k - 1; + ctrmm_("Right", uplo, "Conjugate transpose", "Non-unit", & + i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, &a[k * + a_dim1 + 1], lda); + chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); +/* L30: */ + } + } else { + +/* Compute L**H*A*L */ + + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = f2cmin(i__3,nb); + +/* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ + + i__3 = k - 1; + ctrmm_("Right", uplo, "No transpose", "Non-unit", &kb, & + i__3, &c_b1, &b[b_offset], ldb, &a[k + a_dim1], + lda); + i__3 = k - 1; + chemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1] + , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1], + lda); + i__3 = k - 1; + cher2k_(uplo, "Conjugate transpose", &i__3, &kb, &c_b1, & + a[k + a_dim1], lda, &b[k + b_dim1], ldb, &c_b18, & + a[a_offset], lda); + i__3 = k - 1; + chemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1] + , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1], + lda); + i__3 = k - 1; + ctrmm_("Left", uplo, "Conjugate transpose", "Non-unit", & + kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, &a[k + + a_dim1], lda); + chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info); +/* L40: */ + } + } + } + } + return 0; + +/* End of CHEGST */ + +} /* chegst_ */ + diff --git a/lapack-netlib/SRC/chegv.c b/lapack-netlib/SRC/chegv.c new file mode 100644 index 000000000..ed0b11804 --- /dev/null +++ b/lapack-netlib/SRC/chegv.c @@ -0,0 +1,735 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHEGV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEGV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */ +/* LWORK, RWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEGV computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ +/* > Here A and B are assumed to be Hermitian and B is also */ +/* > positive definite. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > matrix Z of eigenvectors. The eigenvectors are normalized */ +/* > as follows: */ +/* > if ITYPE = 1 or 2, Z**H*B*Z = I; */ +/* > if ITYPE = 3, Z**H*inv(B)*Z = I. */ +/* > If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* > or the lower triangle (if UPLO='L') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the Hermitian positive definite matrix B. */ +/* > If UPLO = 'U', the leading N-by-N upper triangular part of B */ +/* > contains the upper triangular part of the matrix B. */ +/* > If UPLO = 'L', the leading N-by-N lower triangular part of B */ +/* > contains the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**H*U or B = L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,2*N-1). */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the blocksize for CHETRD returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (f2cmax(1, 3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: CPOTRF or CHEEV returned an error code: */ +/* > <= N: if INFO = i, CHEEV failed to converge; */ +/* > i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int chegv_(integer *itype, char *jobz, char *uplo, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, real *w, + complex *work, integer *lwork, real *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer neig; + extern /* Subroutine */ int cheev_(char *, char *, integer *, complex *, + integer *, real *, complex *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + char trans[1]; + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + logical upper, wantz; + integer nb; + extern /* Subroutine */ int chegst_(integer *, char *, integer *, complex + *, integer *, complex *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), cpotrf_( + char *, integer *, complex *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --w; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,i__2); + work[1].r = (real) lwkopt, work[1].i = 0.f; + +/* Computing MAX */ + i__1 = 1, i__2 = (*n << 1) - 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -11; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEGV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + cpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + cheev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[1] + , info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**H*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + + ctrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[ + b_offset], ldb, &a[a_offset], lda); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**H*y */ + + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + + ctrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[ + b_offset], ldb, &a[a_offset], lda); + } + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHEGV */ + +} /* chegv_ */ + diff --git a/lapack-netlib/SRC/chegv_2stage.c b/lapack-netlib/SRC/chegv_2stage.c new file mode 100644 index 000000000..0b542267a --- /dev/null +++ b/lapack-netlib/SRC/chegv_2stage.c @@ -0,0 +1,795 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHEGV_2STAGE */ + +/* @generated from zhegv_2stage.f, fortran z -> c, Sun Nov 6 13:09:52 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEGV_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, */ +/* WORK, LWORK, RWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ +/* > Here A and B are assumed to be Hermitian and B is also */ +/* > positive definite. */ +/* > This routine use the 2stage technique for the reduction to tridiagonal */ +/* > which showed higher performance on recent architecture and for large */ +/* > sizes N>2000. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > Not available in this release. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > matrix Z of eigenvectors. The eigenvectors are normalized */ +/* > as follows: */ +/* > if ITYPE = 1 or 2, Z**H*B*Z = I; */ +/* > if ITYPE = 3, Z**H*inv(B)*Z = I. */ +/* > If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* > or the lower triangle (if UPLO='L') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the Hermitian positive definite matrix B. */ +/* > If UPLO = 'U', the leading N-by-N upper triangular part of B */ +/* > contains the upper triangular part of the matrix B. */ +/* > If UPLO = 'L', the leading N-by-N lower triangular part of B */ +/* > contains the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**H*U or B = L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1, when N <= 1; */ +/* > otherwise */ +/* > If JOBZ = 'N' and N > 1, LWORK must be queried. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N + N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (f2cmax(1, 3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: CPOTRF or CHEEV returned an error code: */ +/* > <= N: if INFO = i, CHEEV failed to converge; */ +/* > i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexHEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > All details about the 2stage techniques are available in: */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int chegv_2stage_(integer *itype, char *jobz, char *uplo, + integer *n, complex *a, integer *lda, complex *b, integer *ldb, real * + w, complex *work, integer *lwork, real *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + integer neig; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + extern logical lsame_(char *, char *); + integer lhtrd; + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + integer lwmin; + char trans[1]; + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + logical upper; + integer lwtrd; + logical wantz; + integer ib, kd; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), chegst_( + integer *, char *, integer *, complex *, integer *, complex *, + integer *, integer *), cpotrf_(char *, integer *, complex + *, integer *, integer *); + logical lquery; + extern /* Subroutine */ int cheev_2stage_(char *, char *, integer *, + complex *, integer *, real *, complex *, integer *, real *, + integer *); + + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --w; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! lsame_(jobz, "N")) { + *info = -2; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } + + if (*info == 0) { + kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", jobz, n, &c_n1, &c_n1, & + c_n1); + ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1); + lhtrd = ilaenv2stage_(&c__3, "CHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwtrd = ilaenv2stage_(&c__4, "CHETRD_2STAGE", jobz, n, &kd, &ib, & + c_n1); + lwmin = *n + lhtrd + lwtrd; + work[1].r = (real) lwmin, work[1].i = 0.f; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEGV_2STAGE ", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + cpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + cheev_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, & + rwork[1], info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + + ctrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[ + b_offset], ldb, &a[a_offset], lda); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**H *y */ + + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + + ctrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[ + b_offset], ldb, &a[a_offset], lda); + } + } + + work[1].r = (real) lwmin, work[1].i = 0.f; + + return 0; + +/* End of CHEGV_2STAGE */ + +} /* chegv_2stage__ */ + diff --git a/lapack-netlib/SRC/chegvd.c b/lapack-netlib/SRC/chegvd.c new file mode 100644 index 000000000..cd5593016 --- /dev/null +++ b/lapack-netlib/SRC/chegvd.c @@ -0,0 +1,827 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHEGVD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEGVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */ +/* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) */ + +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N */ +/* INTEGER IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEGVD computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a complex generalized Hermitian-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ +/* > B are assumed to be Hermitian and B is also positive definite. */ +/* > If eigenvectors are desired, it uses a divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > matrix Z of eigenvectors. The eigenvectors are normalized */ +/* > as follows: */ +/* > if ITYPE = 1 or 2, Z**H*B*Z = I; */ +/* > if ITYPE = 3, Z**H*inv(B)*Z = I. */ +/* > If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* > or the lower triangle (if UPLO='L') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the Hermitian matrix B. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of B contains the */ +/* > upper triangular part of the matrix B. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of B contains */ +/* > the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**H*U or B = L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. */ +/* > If N <= 1, LWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK >= N + 1. */ +/* > If JOBZ = 'V' and N > 1, LWORK >= 2*N + N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (MAX(1,LRWORK)) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of the array RWORK. */ +/* > If N <= 1, LRWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK >= N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If N <= 1, LIWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LIWORK >= 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: CPOTRF or CHEEVD returned an error code: */ +/* > <= N: if INFO = i and JOBZ = 'N', then the algorithm */ +/* > failed to converge; i off-diagonal elements of an */ +/* > intermediate tridiagonal form did not converge to */ +/* > zero; */ +/* > if INFO = i and JOBZ = 'V', then the algorithm */ +/* > failed to compute an eigenvalue while working on */ +/* > the submatrix lying in rows and columns INFO/(N+1) */ +/* > through mod(INFO,N+1); */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Modified so that no backsubstitution is performed if CHEEVD fails to */ +/* > converge (NEIG in old code could be greater than N causing out of */ +/* > bounds reference to A - reported by Ralf Meyer). Also corrected the */ +/* > description of INFO and the test on ITYPE. Sven, 16 Feb 05. */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int chegvd_(integer *itype, char *jobz, char *uplo, integer * + n, complex *a, integer *lda, complex *b, integer *ldb, real *w, + complex *work, integer *lwork, real *rwork, integer *lrwork, integer * + iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + real r__1, r__2; + + /* Local variables */ + integer lopt; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + integer lwmin; + char trans[1]; + integer liopt; + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + logical upper; + integer lropt; + logical wantz; + extern /* Subroutine */ int cheevd_(char *, char *, integer *, complex *, + integer *, real *, complex *, integer *, real *, integer *, + integer *, integer *, integer *), chegst_(integer + *, char *, integer *, complex *, integer *, complex *, integer *, + integer *), xerbla_(char *, integer *, ftnlen), cpotrf_( + char *, integer *, complex *, integer *, integer *); + integer liwmin, lrwmin; + logical lquery; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + } else if (wantz) { + lwmin = (*n << 1) + *n * *n; + lrwmin = *n * 5 + 1 + (*n << 1) * *n; + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1; + lrwmin = *n; + liwmin = 1; + } + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } + + if (*info == 0) { + work[1].r = (real) lopt, work[1].i = 0.f; + rwork[1] = (real) lropt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -13; + } else if (*liwork < liwmin && ! lquery) { + *info = -15; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEGVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + cpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + cheevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[ + 1], lrwork, &iwork[1], liwork, info); +/* Computing MAX */ + r__1 = (real) lopt, r__2 = work[1].r; + lopt = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = (real) lropt; + lropt = f2cmax(r__1,rwork[1]); +/* Computing MAX */ + r__1 = (real) liopt, r__2 = (real) iwork[1]; + liopt = f2cmax(r__1,r__2); + + if (wantz && *info == 0) { + +/* Backtransform eigenvectors to the original problem. */ + + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + + ctrsm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset], + ldb, &a[a_offset], lda); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**H *y */ + + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + + ctrmm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset], + ldb, &a[a_offset], lda); + } + } + + work[1].r = (real) lopt, work[1].i = 0.f; + rwork[1] = (real) lropt; + iwork[1] = liopt; + + return 0; + +/* End of CHEGVD */ + +} /* chegvd_ */ + diff --git a/lapack-netlib/SRC/chegvx.c b/lapack-netlib/SRC/chegvx.c new file mode 100644 index 000000000..518b62bb5 --- /dev/null +++ b/lapack-netlib/SRC/chegvx.c @@ -0,0 +1,894 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHEGVX */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHEGVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, */ +/* VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, */ +/* LWORK, RWORK, IWORK, IFAIL, INFO ) */ + +/* CHARACTER JOBZ, RANGE, UPLO */ +/* INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N */ +/* REAL ABSTOL, VL, VU */ +/* INTEGER IFAIL( * ), IWORK( * ) */ +/* REAL RWORK( * ), W( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHEGVX computes selected eigenvalues, and optionally, eigenvectors */ +/* > of a complex generalized Hermitian-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ +/* > B are assumed to be Hermitian and B is also positive definite. */ +/* > Eigenvalues and eigenvectors can be selected by specifying either a */ +/* > range of values or a range of indices for the desired eigenvalues. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all eigenvalues will be found. */ +/* > = 'V': all eigenvalues in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th eigenvalues will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, the lower triangle (if UPLO='L') or the upper */ +/* > triangle (if UPLO='U') of A, including the diagonal, is */ +/* > destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB, N) */ +/* > On entry, the Hermitian matrix B. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of B contains the */ +/* > upper triangular part of the matrix B. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of B contains */ +/* > the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**H*U or B = L*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is REAL */ +/* > */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is REAL */ +/* > */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for eigenvalues. VL < VU. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > */ +/* > If RANGE='I', the index of the */ +/* > smallest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > */ +/* > If RANGE='I', the index of the */ +/* > largest eigenvalue to be returned. */ +/* > 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ABSTOL */ +/* > \verbatim */ +/* > ABSTOL is REAL */ +/* > The absolute error tolerance for the eigenvalues. */ +/* > An approximate eigenvalue is accepted as converged */ +/* > when it is determined to lie in an interval [a,b] */ +/* > of width less than or equal to */ +/* > */ +/* > ABSTOL + EPS * f2cmax( |a|,|b| ) , */ +/* > */ +/* > where EPS is the machine precision. If ABSTOL is less than */ +/* > or equal to zero, then EPS*|T| will be used in its place, */ +/* > where |T| is the 1-norm of the tridiagonal matrix obtained */ +/* > by reducing C to tridiagonal form, where C is the symmetric */ +/* > matrix of the standard symmetric problem to which the */ +/* > generalized problem is transformed. */ +/* > */ +/* > Eigenvalues will be computed most accurately when ABSTOL is */ +/* > set to twice the underflow threshold 2*SLAMCH('S'), not zero. */ +/* > If this routine returns with INFO>0, indicating that some */ +/* > eigenvectors did not converge, try setting ABSTOL to */ +/* > 2*SLAMCH('S'). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The total number of eigenvalues found. 0 <= M <= N. */ +/* > If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is REAL array, dimension (N) */ +/* > The first M elements contain the selected */ +/* > eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX array, dimension (LDZ, f2cmax(1,M)) */ +/* > If JOBZ = 'N', then Z is not referenced. */ +/* > If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ +/* > contain the orthonormal eigenvectors of the matrix A */ +/* > corresponding to the selected eigenvalues, with the i-th */ +/* > column of Z holding the eigenvector associated with W(i). */ +/* > The eigenvectors are normalized as follows: */ +/* > if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* > if ITYPE = 3, Z**T*inv(B)*Z = I. */ +/* > */ +/* > If an eigenvector fails to converge, then that column of Z */ +/* > contains the latest approximation to the eigenvector, and the */ +/* > index of the eigenvector is returned in IFAIL. */ +/* > Note: the user must ensure that at least f2cmax(1,M) columns are */ +/* > supplied in the array Z; if RANGE = 'V', the exact value of M */ +/* > is not known in advance and an upper bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > JOBZ = 'V', LDZ >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the blocksize for CHETRD returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAIL */ +/* > \verbatim */ +/* > IFAIL is INTEGER array, dimension (N) */ +/* > If JOBZ = 'V', then if INFO = 0, the first M elements of */ +/* > IFAIL are zero. If INFO > 0, then IFAIL contains the */ +/* > indices of the eigenvectors that failed to converge. */ +/* > If JOBZ = 'N', then IFAIL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: CPOTRF or CHEEVX returned an error code: */ +/* > <= N: if INFO = i, CHEEVX failed to converge; */ +/* > i eigenvectors failed to converge. Their indices */ +/* > are stored in array IFAIL. */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup complexHEeigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ + +/* ===================================================================== */ +/* Subroutine */ int chegvx_(integer *itype, char *jobz, char *range, char * + uplo, integer *n, complex *a, integer *lda, complex *b, integer *ldb, + real *vl, real *vu, integer *il, integer *iu, real *abstol, integer * + m, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, + real *rwork, integer *iwork, integer *ifail, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1, i__2; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + char trans[1]; + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + logical upper, wantz; + integer nb; + logical alleig, indeig, valeig; + extern /* Subroutine */ int chegst_(integer *, char *, integer *, complex + *, integer *, complex *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), cheevx_( + char *, char *, char *, integer *, complex *, integer *, real *, + real *, integer *, integer *, real *, integer *, real *, complex * + , integer *, complex *, integer *, real *, integer *, integer *, + integer *), cpotrf_(char *, integer *, + complex *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + --ifail; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + upper = lsame_(uplo, "U"); + alleig = lsame_(range, "A"); + valeig = lsame_(range, "V"); + indeig = lsame_(range, "I"); + lquery = *lwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || lsame_(jobz, "N"))) { + *info = -2; + } else if (! (alleig || valeig || indeig)) { + *info = -3; + } else if (! (upper || lsame_(uplo, "L"))) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else { + if (valeig) { + if (*n > 0 && *vu <= *vl) { + *info = -11; + } + } else if (indeig) { + if (*il < 1 || *il > f2cmax(1,*n)) { + *info = -12; + } else if (*iu < f2cmin(*n,*il) || *iu > *n) { + *info = -13; + } + } + } + if (*info == 0) { + if (*ldz < 1 || wantz && *ldz < *n) { + *info = -18; + } + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = f2cmax(i__1,i__2); + work[1].r = (real) lwkopt, work[1].i = 0.f; + +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -20; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEGVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + *m = 0; + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + cpotrf_(uplo, n, &b[b_offset], ldb, info); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); + cheevx_(jobz, range, uplo, n, &a[a_offset], lda, vl, vu, il, iu, abstol, + m, &w[1], &z__[z_offset], ldz, &work[1], lwork, &rwork[1], &iwork[ + 1], &ifail[1], info); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + if (*info > 0) { + *m = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**H*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + + ctrsm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset], + ldb, &z__[z_offset], ldz); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**H*y */ + + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + + ctrmm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset], + ldb, &z__[z_offset], ldz); + } + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHEGVX */ + +} /* chegvx_ */ + diff --git a/lapack-netlib/SRC/cherfs.c b/lapack-netlib/SRC/cherfs.c new file mode 100644 index 000000000..66b0ebcd9 --- /dev/null +++ b/lapack-netlib/SRC/cherfs.c @@ -0,0 +1,924 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHERFS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHERFS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */ +/* X, LDX, FERR, BERR, WORK, RWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* REAL BERR( * ), FERR( * ), RWORK( * ) */ +/* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHERFS improves the computed solution to a system of linear */ +/* > equations when the coefficient matrix is Hermitian indefinite, and */ +/* > provides error bounds and backward error estimates for the solution. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > The Hermitian matrix A. If UPLO = 'U', the leading N-by-N */ +/* > upper triangular part of A contains the upper triangular part */ +/* > of the matrix A, and the strictly lower triangular part of A */ +/* > is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* > triangular part of A contains the lower triangular part of */ +/* > the matrix A, and the strictly upper triangular part of A is */ +/* > not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is COMPLEX array, dimension (LDAF,N) */ +/* > The factored form of the matrix A. AF contains the block */ +/* > diagonal matrix D and the multipliers used to obtain the */ +/* > factor U or L from the factorization A = U*D*U**H or */ +/* > A = L*D*L**H as computed by CHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D */ +/* > as determined by CHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by CHETRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is REAL array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > ITMAX is the maximum number of steps of iterative refinement. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int cherfs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * + b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, + complex *work, real *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2, r__3, r__4; + complex q__1; + + /* Local variables */ + integer kase; + real safe1, safe2; + integer i__, j, k; + real s; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex * + , integer *, complex *, integer *, complex *, complex *, integer * + ); + integer isave[3]; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + integer count; + logical upper; + extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real + *, integer *, integer *); + real xk; + extern real slamch_(char *); + integer nz; + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), chetrs_( + char *, integer *, integer *, complex *, integer *, integer *, + complex *, integer *, integer *); + real lstres, eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } else if (*ldx < f2cmax(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHERFS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.f; + berr[j] = 0.f; +/* L10: */ + } + return 0; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = slamch_("Epsilon"); + safmin = slamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.f; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - A * X */ + + ccopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1); + q__1.r = -1.f, q__1.i = 0.f; + chemv_(uplo, n, &q__1, &a[a_offset], lda, &x[j * x_dim1 + 1], &c__1, & + c_b1, &work[1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* f2cmax(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + rwork[i__] = (r__1 = b[i__3].r, abs(r__1)) + (r__2 = r_imag(&b[ + i__ + j * b_dim1]), abs(r__2)); +/* L30: */ + } + +/* Compute abs(A)*abs(X) + abs(B). */ + + if (upper) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.f; + i__3 = k + j * x_dim1; + xk = (r__1 = x[i__3].r, abs(r__1)) + (r__2 = r_imag(&x[k + j * + x_dim1]), abs(r__2)); + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + k * a_dim1; + rwork[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = + r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * xk; + i__4 = i__ + k * a_dim1; + i__5 = i__ + j * x_dim1; + s += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[ + i__ + k * a_dim1]), abs(r__2))) * ((r__3 = x[i__5] + .r, abs(r__3)) + (r__4 = r_imag(&x[i__ + j * + x_dim1]), abs(r__4))); +/* L40: */ + } + i__3 = k + k * a_dim1; + rwork[k] = rwork[k] + (r__1 = a[i__3].r, abs(r__1)) * xk + s; +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.f; + i__3 = k + j * x_dim1; + xk = (r__1 = x[i__3].r, abs(r__1)) + (r__2 = r_imag(&x[k + j * + x_dim1]), abs(r__2)); + i__3 = k + k * a_dim1; + rwork[k] += (r__1 = a[i__3].r, abs(r__1)) * xk; + i__3 = *n; + for (i__ = k + 1; i__ <= i__3; ++i__) { + i__4 = i__ + k * a_dim1; + rwork[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = + r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * xk; + i__4 = i__ + k * a_dim1; + i__5 = i__ + j * x_dim1; + s += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[ + i__ + k * a_dim1]), abs(r__2))) * ((r__3 = x[i__5] + .r, abs(r__3)) + (r__4 = r_imag(&x[i__ + j * + x_dim1]), abs(r__4))); +/* L60: */ + } + rwork[k] += s; +/* L70: */ + } + } + s = 0.f; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { +/* Computing MAX */ + i__3 = i__; + r__3 = s, r__4 = ((r__1 = work[i__3].r, abs(r__1)) + (r__2 = + r_imag(&work[i__]), abs(r__2))) / rwork[i__]; + s = f2cmax(r__3,r__4); + } else { +/* Computing MAX */ + i__3 = i__; + r__3 = s, r__4 = ((r__1 = work[i__3].r, abs(r__1)) + (r__2 = + r_imag(&work[i__]), abs(r__2)) + safe1) / (rwork[i__] + + safe1); + s = f2cmax(r__3,r__4); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) { + +/* Update solution and try again. */ + + chetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], + n, info); + caxpy_(n, &c_b1, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1); + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(A))* */ +/* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(A) is the inverse of A */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(A)*abs(X) + abs(B) is less than SAFE2. */ + +/* Use CLACN2 to estimate the infinity-norm of the matrix */ +/* inv(A) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (rwork[i__] > safe2) { + i__3 = i__; + rwork[i__] = (r__1 = work[i__3].r, abs(r__1)) + (r__2 = + r_imag(&work[i__]), abs(r__2)) + nz * eps * rwork[i__] + ; + } else { + i__3 = i__; + rwork[i__] = (r__1 = work[i__3].r, abs(r__1)) + (r__2 = + r_imag(&work[i__]), abs(r__2)) + nz * eps * rwork[i__] + + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + clacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(A**H). */ + + chetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; +/* L110: */ + } + } else if (kase == 2) { + +/* Multiply by inv(A)*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__; + q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] + * work[i__5].i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; +/* L120: */ + } + chetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[ + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.f; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + i__3 = i__ + j * x_dim1; + r__3 = lstres, r__4 = (r__1 = x[i__3].r, abs(r__1)) + (r__2 = + r_imag(&x[i__ + j * x_dim1]), abs(r__2)); + lstres = f2cmax(r__3,r__4); +/* L130: */ + } + if (lstres != 0.f) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of CHERFS */ + +} /* cherfs_ */ + diff --git a/lapack-netlib/SRC/cherfsx.c b/lapack-netlib/SRC/cherfsx.c new file mode 100644 index 000000000..e8dddda2b --- /dev/null +++ b/lapack-netlib/SRC/cherfsx.c @@ -0,0 +1,381 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHESV computes the solution to system of linear equations A * X = B for HE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHESV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */ +/* LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, LWORK, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHESV computes the solution to a complex system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > The diagonal pivoting method is used to factor A as */ +/* > A = U * D * U**H, if UPLO = 'U', or */ +/* > A = L * D * L**H, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and D is Hermitian and block diagonal with */ +/* > 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then */ +/* > used to solve the system of equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the block diagonal matrix D and the */ +/* > multipliers used to obtain the factor U or L from the */ +/* > factorization A = U*D*U**H or A = L*D*L**H as computed by */ +/* > CHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D, as */ +/* > determined by CHETRF. If IPIV(k) > 0, then rows and columns */ +/* > k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 */ +/* > diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, */ +/* > then rows and columns k-1 and -IPIV(k) were interchanged and */ +/* > D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and */ +/* > IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and */ +/* > -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 */ +/* > diagonal block. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= 1, and for best performance */ +/* > LWORK >= f2cmax(1,N*NB), where NB is the optimal blocksize for */ +/* > CHETRF. */ +/* > for LWORK < N, TRS will be done with Level BLAS 2 */ +/* > for LWORK >= N, TRS will be done with Level BLAS 3 */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int chesv_(char *uplo, integer *n, integer *nrhs, complex *a, + integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + integer nb; + extern /* Subroutine */ int chetrf_(char *, integer *, complex *, integer + *, integer *, complex *, integer *, integer *), xerbla_( + char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int chetrs_(char *, integer *, integer *, complex + *, integer *, integer *, complex *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int chetrs2_(char *, integer *, integer *, + complex *, integer *, integer *, complex *, integer *, complex *, + integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else if (*lwork < 1 && ! lquery) { + *info = -10; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, "CHETRF", uplo, n, &c_n1, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHESV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Compute the factorization A = U*D*U**H or A = L*D*L**H. */ + + chetrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + if (*lwork < *n) { + +/* Solve with TRS ( Use Level BLAS 2) */ + + chetrs_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], + ldb, info); + + } else { + +/* Solve with TRS2 ( Use Level BLAS 3) */ + + chetrs2_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], + ldb, &work[1], info); + + } + + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHESV */ + +} /* chesv_ */ + diff --git a/lapack-netlib/SRC/chesv_aa.c b/lapack-netlib/SRC/chesv_aa.c new file mode 100644 index 000000000..04fae8ec6 --- /dev/null +++ b/lapack-netlib/SRC/chesv_aa.c @@ -0,0 +1,652 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHESV_AA computes the solution to system of linear equations A * X = B for HE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHESV_AA + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */ +/* LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, LWORK, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHESV_AA computes the solution to a complex system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > Aasen's algorithm is used to factor A as */ +/* > A = U**H * T * U, if UPLO = 'U', or */ +/* > A = L * T * L**H, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and T is Hermitian and tridiagonal. The factored form */ +/* > of A is then used to solve the system of equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the tridiagonal matrix T and the */ +/* > multipliers used to obtain the factor U or L from the */ +/* > factorization A = U**H*T*U or A = L*T*L**H as computed by */ +/* > CHETRF_AA. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > On exit, it contains the details of the interchanges, i.e., */ +/* > the row and column k of A were interchanged with the */ +/* > row and column IPIV(k). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= MAX(1,2*N,3*N-2), and for best */ +/* > performance LWORK >= MAX(1,N*NB), where NB is the optimal */ +/* > blocksize for CHETRF. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexHEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int chesv_aa_(char *uplo, integer *n, integer *nrhs, + complex *a, integer *lda, integer *ipiv, complex *b, integer *ldb, + complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer lwkopt_hetrf__, lwkopt_hetrs__; + extern /* Subroutine */ int chetrf_aa_(char *, integer *, complex *, + integer *, integer *, complex *, integer *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int chetrs_aa_(char *, integer *, integer *, + complex *, integer *, integer *, complex *, integer *, complex *, + integer *, integer *), xerbla_(char *, integer *, ftnlen); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n << 1, i__2 = *n * 3 - 2; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + + if (*info == 0) { + chetrf_aa_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], &c_n1, + info); + lwkopt_hetrf__ = (integer) work[1].r; + chetrs_aa_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], + ldb, &work[1], &c_n1, info); + lwkopt_hetrs__ = (integer) work[1].r; + lwkopt = f2cmax(lwkopt_hetrf__,lwkopt_hetrs__); + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHESV_AA ", &i__1, (ftnlen)9); + return 0; + } else if (lquery) { + return 0; + } + +/* Compute the factorization A = U**H*T*U or A = L*T*L**H. */ + + chetrf_aa_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + chetrs_aa_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], + ldb, &work[1], lwork, info); + + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHESV_AA */ + +} /* chesv_aa__ */ + diff --git a/lapack-netlib/SRC/chesv_aa_2stage.c b/lapack-netlib/SRC/chesv_aa_2stage.c new file mode 100644 index 000000000..07a980444 --- /dev/null +++ b/lapack-netlib/SRC/chesv_aa_2stage.c @@ -0,0 +1,679 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHESV_AA_2STAGE computes the solution to system of linear equations A * X = B for HE matrices + */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHESV_AA_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, */ +/* IPIV, IPIV2, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO */ +/* INTEGER IPIV( * ), IPIV2( * ) */ +/* COMPLEX A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) */ + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHESV_AA_2STAGE computes the solution to a complex system of */ +/* > linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > Aasen's 2-stage algorithm is used to factor A as */ +/* > A = U**H * T * U, if UPLO = 'U', or */ +/* > A = L * T * L**H, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and T is Hermitian and band. The matrix T is */ +/* > then LU-factored with partial pivoting. The factored form of A */ +/* > is then used to solve the system of equations A * X = B. */ +/* > */ +/* > This is the blocked version of the algorithm, calling Level 3 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, L is stored below (or above) the subdiaonal blocks, */ +/* > when UPLO is 'L' (or 'U'). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TB */ +/* > \verbatim */ +/* > TB is COMPLEX array, dimension (LTB) */ +/* > On exit, details of the LU factorization of the band matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LTB */ +/* > \verbatim */ +/* > LTB is INTEGER */ +/* > The size of the array TB. LTB >= 4*N, internally */ +/* > used to select NB such that LTB >= (3*NB+1)*N. */ +/* > */ +/* > If LTB = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal size of LTB, */ +/* > returns this value as the first entry of TB, and */ +/* > no error message related to LTB is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > On exit, it contains the details of the interchanges, i.e., */ +/* > the row and column k of A were interchanged with the */ +/* > row and column IPIV(k). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV2 */ +/* > \verbatim */ +/* > IPIV2 is INTEGER array, dimension (N) */ +/* > On exit, it contains the details of the interchanges, i.e., */ +/* > the row and column k of T were interchanged with the */ +/* > row and column IPIV(k). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX workspace of size LWORK */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The size of WORK. LWORK >= N, internally used to select NB */ +/* > such that LWORK >= N*NB. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal size of the WORK array, */ +/* > returns this value as the first entry of the WORK array, and */ +/* > no error message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, band LU factorization failed on i-th column */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexSYcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int chesv_aa_2stage_(char *uplo, integer *n, integer *nrhs, + complex *a, integer *lda, complex *tb, integer *ltb, integer *ipiv, + integer *ipiv2, complex *b, integer *ldb, complex *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int chetrs_aa_2stage_(char *, integer *, integer + *, complex *, integer *, complex *, integer *, integer *, integer + *, complex *, integer *, integer *); + extern logical lsame_(char *, char *); + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer lwkopt; + logical tquery, wquery; + extern /* Subroutine */ int chetrf_aa_2stage_(char *, integer *, complex + *, integer *, complex *, integer *, integer *, integer *, complex + *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tb; + --ipiv; + --ipiv2; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + wquery = *lwork == -1; + tquery = *ltb == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ltb < *n << 2 && ! tquery) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -11; + } else if (*lwork < *n && ! wquery) { + *info = -13; + } + + if (*info == 0) { + chetrf_aa_2stage_(uplo, n, &a[a_offset], lda, &tb[1], &c_n1, &ipiv[1] + , &ipiv2[1], &work[1], &c_n1, info); + lwkopt = (integer) work[1].r; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHESV_AA_2STAGE", &i__1, (ftnlen)15); + return 0; + } else if (wquery || tquery) { + return 0; + } + + +/* Compute the factorization A = U**H*T*U or A = L*T*L**H. */ + + chetrf_aa_2stage_(uplo, n, &a[a_offset], lda, &tb[1], ltb, &ipiv[1], & + ipiv2[1], &work[1], lwork, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + chetrs_aa_2stage_(uplo, n, nrhs, &a[a_offset], lda, &tb[1], ltb, & + ipiv[1], &ipiv2[1], &b[b_offset], ldb, info); + + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHESV_AA_2STAGE */ + +} /* chesv_aa_2stage__ */ + diff --git a/lapack-netlib/SRC/chesv_rk.c b/lapack-netlib/SRC/chesv_rk.c new file mode 100644 index 000000000..299da06df --- /dev/null +++ b/lapack-netlib/SRC/chesv_rk.c @@ -0,0 +1,716 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHESV_RK computes the solution to system of linear equations A * X = B for SY matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHESV_RK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHESV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, */ +/* WORK, LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, LWORK, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > CHESV_RK computes the solution to a complex system of linear */ +/* > equations A * X = B, where A is an N-by-N Hermitian matrix */ +/* > and X and B are N-by-NRHS matrices. */ +/* > */ +/* > The bounded Bunch-Kaufman (rook) diagonal pivoting method is used */ +/* > to factor A as */ +/* > A = P*U*D*(U**H)*(P**T), if UPLO = 'U', or */ +/* > A = P*L*D*(L**H)*(P**T), if UPLO = 'L', */ +/* > where U (or L) is unit upper (or lower) triangular matrix, */ +/* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */ +/* > matrix, P**T is the transpose of P, and D is Hermitian and block */ +/* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > CHETRF_RK is called to compute the factorization of a complex */ +/* > Hermitian matrix. The factored form of A is then used to solve */ +/* > the system of equations A * X = B by calling BLAS3 routine CHETRS_3. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. */ +/* > If UPLO = 'U': the leading N-by-N upper triangular part */ +/* > of A contains the upper triangular part of the matrix A, */ +/* > and the strictly lower triangular part of A is not */ +/* > referenced. */ +/* > */ +/* > If UPLO = 'L': the leading N-by-N lower triangular part */ +/* > of A contains the lower triangular part of the matrix A, */ +/* > and the strictly upper triangular part of A is not */ +/* > referenced. */ +/* > */ +/* > On exit, if INFO = 0, diagonal of the block diagonal */ +/* > matrix D and factors U or L as computed by CHETRF_RK: */ +/* > a) ONLY diagonal elements of the Hermitian block diagonal */ +/* > matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */ +/* > (superdiagonal (or subdiagonal) elements of D */ +/* > are stored on exit in array E), and */ +/* > b) If UPLO = 'U': factor U in the superdiagonal part of A. */ +/* > If UPLO = 'L': factor L in the subdiagonal part of A. */ +/* > */ +/* > For more info see the description of CHETRF_RK routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is COMPLEX array, dimension (N) */ +/* > On exit, contains the output computed by the factorization */ +/* > routine CHETRF_RK, i.e. the superdiagonal (or subdiagonal) */ +/* > elements of the Hermitian block diagonal matrix D */ +/* > with 1-by-1 or 2-by-2 diagonal blocks, where */ +/* > If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; */ +/* > If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. */ +/* > */ +/* > NOTE: For 1-by-1 diagonal block D(k), where */ +/* > 1 <= k <= N, the element E(k) is set to 0 in both */ +/* > UPLO = 'U' or UPLO = 'L' cases. */ +/* > */ +/* > For more info see the description of CHETRF_RK routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D, */ +/* > as determined by CHETRF_RK. */ +/* > */ +/* > For more info see the description of CHETRF_RK routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension ( MAX(1,LWORK) ). */ +/* > Work array used in the factorization stage. */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= 1. For best performance */ +/* > of factorization stage LWORK >= f2cmax(1,N*NB), where NB is */ +/* > the optimal blocksize for CHETRF_RK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; */ +/* > the routine only calculates the optimal size of the WORK */ +/* > array for factorization stage, returns this value as */ +/* > the first entry of the WORK array, and no error message */ +/* > related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > */ +/* > < 0: If INFO = -k, the k-th argument had an illegal value */ +/* > */ +/* > > 0: If INFO = k, the matrix A is singular, because: */ +/* > If UPLO = 'U': column k in the upper */ +/* > triangular part of A contains all zeros. */ +/* > If UPLO = 'L': column k in the lower */ +/* > triangular part of A contains all zeros. */ +/* > */ +/* > Therefore D(k,k) is exactly zero, and superdiagonal */ +/* > elements of column k of U (or subdiagonal elements of */ +/* > column k of L ) are all zeros. The factorization has */ +/* > been completed, but the block diagonal matrix D is */ +/* > exactly singular, and division by zero will occur if */ +/* > it is used to solve a system of equations. */ +/* > */ +/* > NOTE: INFO only stores the first occurrence of */ +/* > a singularity, any subsequent occurrence of singularity */ +/* > is not stored in INFO even though the factorization */ +/* > always completes. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > December 2016, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int chesv_rk_(char *uplo, integer *n, integer *nrhs, + complex *a, integer *lda, complex *e, integer *ipiv, complex *b, + integer *ldb, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int chetrf_rk_(char *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int chetrs_3_(char *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --e; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*lwork < 1 && ! lquery) { + *info = -11; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + chetrf_rk_(uplo, n, &a[a_offset], lda, &e[1], &ipiv[1], &work[1], + &c_n1, info); + lwkopt = work[1].r; + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHESV_RK ", &i__1,(ftnlen)9); + return 0; + } else if (lquery) { + return 0; + } + +/* Compute the factorization A = U*D*U**T or A = L*D*L**T. */ + + chetrf_rk_(uplo, n, &a[a_offset], lda, &e[1], &ipiv[1], &work[1], lwork, + info); + + if (*info == 0) { + +/* Solve the system A*X = B with BLAS3 solver, overwriting B with X. */ + + chetrs_3_(uplo, n, nrhs, &a[a_offset], lda, &e[1], &ipiv[1], &b[ + b_offset], ldb, info); + + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHESV_RK */ + +} /* chesv_rk__ */ + diff --git a/lapack-netlib/SRC/chesv_rook.c b/lapack-netlib/SRC/chesv_rook.c new file mode 100644 index 000000000..206e0be2f --- /dev/null +++ b/lapack-netlib/SRC/chesv_rook.c @@ -0,0 +1,697 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHESV_ROOK computes the solution to a system of linear equations A * X = B for HE matrices usin +g the bounded Bunch-Kaufman ("rook") diagonal pivoting method */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHESV_ROOK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */ +/* LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, LWORK, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHESV_ROOK computes the solution to a complex system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used */ +/* > to factor A as */ +/* > A = U * D * U**T, if UPLO = 'U', or */ +/* > A = L * D * L**T, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and D is Hermitian and block diagonal with */ +/* > 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > CHETRF_ROOK is called to compute the factorization of a complex */ +/* > Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal */ +/* > pivoting method. */ +/* > */ +/* > The factored form of A is then used to solve the system */ +/* > of equations A * X = B by calling CHETRS_ROOK (uses BLAS 2). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the block diagonal matrix D and the */ +/* > multipliers used to obtain the factor U or L from the */ +/* > factorization A = U*D*U**H or A = L*D*L**H as computed by */ +/* > CHETRF_ROOK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D. */ +/* > */ +/* > If UPLO = 'U': */ +/* > Only the last KB elements of IPIV are set. */ +/* > */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and */ +/* > columns k and -IPIV(k) were interchanged and rows and */ +/* > columns k-1 and -IPIV(k-1) were inerchaged, */ +/* > D(k-1:k,k-1:k) is a 2-by-2 diagonal block. */ +/* > */ +/* > If UPLO = 'L': */ +/* > Only the first KB elements of IPIV are set. */ +/* > */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) */ +/* > were interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and */ +/* > columns k and -IPIV(k) were interchanged and rows and */ +/* > columns k+1 and -IPIV(k+1) were inerchaged, */ +/* > D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= 1, and for best performance */ +/* > LWORK >= f2cmax(1,N*NB), where NB is the optimal blocksize for */ +/* > CHETRF_ROOK. */ +/* > for LWORK < N, TRS will be done with Level BLAS 2 */ +/* > for LWORK >= N, TRS will be done with Level BLAS 3 */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, D(i,i) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2013 */ + +/* > \ingroup complexHEsolve */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2013, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > \endverbatim */ + + +/* ===================================================================== */ +/* Subroutine */ int chesv_rook_(char *uplo, integer *n, integer *nrhs, + complex *a, integer *lda, integer *ipiv, complex *b, integer *ldb, + complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + integer nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int chetrf_rook_(char *, integer *, complex *, + integer *, integer *, complex *, integer *, integer *), + chetrs_rook_(char *, integer *, integer *, complex *, integer *, + integer *, complex *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.5.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2013 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else if (*lwork < 1 && ! lquery) { + *info = -10; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, "CHETRF_ROOK", uplo, n, &c_n1, &c_n1, &c_n1, ( + ftnlen)11, (ftnlen)1); + lwkopt = *n * nb; + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHESV_ROOK ", &i__1, (ftnlen)11); + return 0; + } else if (lquery) { + return 0; + } + +/* Compute the factorization A = U*D*U**H or A = L*D*L**H. */ + + chetrf_rook_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + +/* Solve with TRS ( Use Level BLAS 2) */ + + chetrs_rook_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset] + , ldb, info); + + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHESV_ROOK */ + +} /* chesv_rook__ */ + diff --git a/lapack-netlib/SRC/chesvx.c b/lapack-netlib/SRC/chesvx.c new file mode 100644 index 000000000..1830af7d0 --- /dev/null +++ b/lapack-netlib/SRC/chesvx.c @@ -0,0 +1,844 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHESVX computes the solution to system of linear equations A * X = B for HE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHESVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, */ +/* LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, */ +/* RWORK, INFO ) */ + +/* CHARACTER FACT, UPLO */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS */ +/* REAL RCOND */ +/* INTEGER IPIV( * ) */ +/* REAL BERR( * ), FERR( * ), RWORK( * ) */ +/* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHESVX uses the diagonal pivoting factorization to compute the */ +/* > solution to a complex system of linear equations A * X = B, */ +/* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > Error bounds on the solution and a condition estimate are also */ +/* > provided. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'N', the diagonal pivoting method is used to factor A. */ +/* > The form of the factorization is */ +/* > A = U * D * U**H, if UPLO = 'U', or */ +/* > A = L * D * L**H, if UPLO = 'L', */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and D is Hermitian and block diagonal with */ +/* > 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > 2. If some D(i,i)=0, so that D is exactly singular, then the routine */ +/* > returns with INFO = i. Otherwise, the factored form of A is used */ +/* > to estimate the condition number of the matrix A. If the */ +/* > reciprocal of the condition number is less than machine precision, */ +/* > INFO = N+1 is returned as a warning, but the routine still goes on */ +/* > to solve for X and compute error bounds as described below. */ +/* > */ +/* > 3. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 4. Iterative refinement is applied to improve the computed solution */ +/* > matrix and calculate error bounds and backward error estimates */ +/* > for it. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of A has been */ +/* > supplied on entry. */ +/* > = 'F': On entry, AF and IPIV contain the factored form */ +/* > of A. A, AF and IPIV will not be modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > The Hermitian matrix A. If UPLO = 'U', the leading N-by-N */ +/* > upper triangular part of A contains the upper triangular part */ +/* > of the matrix A, and the strictly lower triangular part of A */ +/* > is not referenced. If UPLO = 'L', the leading N-by-N lower */ +/* > triangular part of A contains the lower triangular part of */ +/* > the matrix A, and the strictly upper triangular part of A is */ +/* > not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is COMPLEX array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the block diagonal matrix D and the multipliers used */ +/* > to obtain the factor U or L from the factorization */ +/* > A = U*D*U**H or A = L*D*L**H as computed by CHETRF. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the block diagonal matrix D and the multipliers used */ +/* > to obtain the factor U or L from the factorization */ +/* > A = U*D*U**H or A = L*D*L**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains details of the interchanges and the block structure */ +/* > of D, as determined by CHETRF. */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */ +/* > columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* > is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = */ +/* > IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */ +/* > interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains details of the interchanges and the block structure */ +/* > of D, as determined by CHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > The N-by-NRHS right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX array, dimension (LDX,NRHS) */ +/* > If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The estimate of the reciprocal condition number of the matrix */ +/* > A. If RCOND is less than the machine precision (in */ +/* > particular, if RCOND = 0), the matrix is singular to working */ +/* > precision. This condition is indicated by a return code of */ +/* > INFO > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is REAL array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of WORK. LWORK >= f2cmax(1,2*N), and for best */ +/* > performance, when FACT = 'N', LWORK >= f2cmax(1,2*N,N*NB), where */ +/* > NB is the optimal blocksize for CHETRF. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: D(i,i) is exactly zero. The factorization */ +/* > has been completed but the factor D is exactly */ +/* > singular, so the solution and error bounds could */ +/* > not be computed. RCOND = 0 is returned. */ +/* > = N+1: D is nonsingular, but RCOND is less than machine */ +/* > precision, meaning that the matrix is singular */ +/* > to working precision. Nevertheless, the */ +/* > solution and error bounds are computed because */ +/* > there are a number of situations where the */ +/* > computed solution can be more accurate than the */ +/* > value of RCOND would suggest. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complexHEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int chesvx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond, + real *ferr, real *berr, complex *work, integer *lwork, real *rwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2; + + /* Local variables */ + extern logical lsame_(char *, char *); + real anorm; + integer nb; + extern real clanhe_(char *, char *, integer *, complex *, integer *, real + *); + extern /* Subroutine */ int checon_(char *, integer *, complex *, integer + *, integer *, real *, real *, complex *, integer *); + extern real slamch_(char *); + logical nofact; + extern /* Subroutine */ int cherfs_(char *, integer *, integer *, complex + *, integer *, complex *, integer *, integer *, complex *, integer + *, complex *, integer *, real *, real *, complex *, real *, + integer *), chetrf_(char *, integer *, complex *, integer + *, integer *, complex *, integer *, integer *), clacpy_( + char *, integer *, integer *, complex *, integer *, complex *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), chetrs_( + char *, integer *, integer *, complex *, integer *, integer *, + complex *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --rwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + lquery = *lwork == -1; + if (! nofact && ! lsame_(fact, "F")) { + *info = -1; + } else if (! lsame_(uplo, "U") && ! lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (*ldb < f2cmax(1,*n)) { + *info = -11; + } else if (*ldx < f2cmax(1,*n)) { + *info = -13; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -18; + } + } + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + lwkopt = f2cmax(i__1,i__2); + if (nofact) { + nb = ilaenv_(&c__1, "CHETRF", uplo, n, &c_n1, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n * nb; + lwkopt = f2cmax(i__1,i__2); + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHESVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + + if (nofact) { + +/* Compute the factorization A = U*D*U**H or A = L*D*L**H. */ + + clacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); + chetrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], lwork, + info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.f; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + anorm = clanhe_("I", uplo, n, &a[a_offset], lda, &rwork[1]); + +/* Compute the reciprocal of the condition number of A. */ + + checon_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &anorm, rcond, &work[1], + info); + +/* Compute the solution vectors X. */ + + clacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + chetrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solutions and */ +/* compute error bounds and backward error estimates for them. */ + + cherfs_(uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], + &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1] + , &rwork[1], info); + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < slamch_("Epsilon")) { + *info = *n + 1; + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + + return 0; + +/* End of CHESVX */ + +} /* chesvx_ */ + diff --git a/lapack-netlib/SRC/chesvxx.c b/lapack-netlib/SRC/chesvxx.c new file mode 100644 index 000000000..bfdf1e21e --- /dev/null +++ b/lapack-netlib/SRC/chesvxx.c @@ -0,0 +1,1125 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief CHESVXX computes the solution to system of linear equations A * X = B for HE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHESVXX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHESVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, */ +/* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, */ +/* NPARAMS, PARAMS, WORK, RWORK, INFO ) */ + +/* CHARACTER EQUED, FACT, UPLO */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */ +/* $ N_ERR_BNDS */ +/* REAL RCOND, RPVGRW */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ WORK( * ), X( LDX, * ) */ +/* REAL S( * ), PARAMS( * ), BERR( * ), RWORK( * ), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHESVXX uses the diagonal pivoting factorization to compute the */ +/* > solution to a complex system of linear equations A * X = B, where */ +/* > A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > If requested, both normwise and maximum componentwise error bounds */ +/* > are returned. CHESVXX will return a solution with a tiny */ +/* > guaranteed error (O(eps) where eps is the working machine */ +/* > precision) unless the matrix is very ill-conditioned, in which */ +/* > case a warning is returned. Relevant condition numbers also are */ +/* > calculated and returned. */ +/* > */ +/* > CHESVXX accepts user-provided factorizations and equilibration */ +/* > factors; see the definitions of the FACT and EQUED options. */ +/* > Solving with refinement and using a factorization from a previous */ +/* > CHESVXX call will also produce a solution with either O(eps) */ +/* > errors or warnings, but we cannot make that claim for general */ +/* > user-provided factorizations and equilibration factors if they */ +/* > differ from what CHESVXX would itself produce. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* > the system: */ +/* > */ +/* > diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B */ +/* > */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(S)*A*diag(S) and B by diag(S)*B. */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ +/* > the matrix A (after equilibration if FACT = 'E') as */ +/* > */ +/* > A = U * D * U**T, if UPLO = 'U', or */ +/* > A = L * D * L**T, if UPLO = 'L', */ +/* > */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, and D is Hermitian and block diagonal with */ +/* > 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > 3. If some D(i,i)=0, so that D is exactly singular, then the */ +/* > routine returns with INFO = i. Otherwise, the factored form of A */ +/* > is used to estimate the condition number of the matrix A (see */ +/* > argument RCOND). If the reciprocal of the condition number is */ +/* > less than machine precision, the routine still goes on to solve */ +/* > for X and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* > the routine will use iterative refinement to try to get a small */ +/* > error and error bounds. Refinement calculates the residual to at */ +/* > least twice the working precision. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(R) so that it solves the original system before */ +/* > equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \verbatim */ +/* > Some optional parameters are bundled in the PARAMS array. These */ +/* > settings determine how refinement is performed, but often the */ +/* > defaults are acceptable. If the defaults are acceptable, users */ +/* > can pass NPARAMS = 0 which prevents the source code from accessing */ +/* > the PARAMS argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* > If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by S. */ +/* > A, AF, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > The Hermitian matrix A. If UPLO = 'U', the leading N-by-N */ +/* > upper triangular part of A contains the upper triangular */ +/* > part of the matrix A, and the strictly lower triangular */ +/* > part of A is not referenced. If UPLO = 'L', the leading */ +/* > N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by */ +/* > diag(S)*A*diag(S). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is COMPLEX array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the block diagonal matrix D and the multipliers */ +/* > used to obtain the factor U or L from the factorization A = */ +/* > U*D*U**T or A = L*D*L**T as computed by SSYTRF. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the block diagonal matrix D and the multipliers */ +/* > used to obtain the factor U or L from the factorization A = */ +/* > U*D*U**T or A = L*D*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains details of the interchanges and the block */ +/* > structure of D, as determined by CHETRF. If IPIV(k) > 0, */ +/* > then rows and columns k and IPIV(k) were interchanged and */ +/* > D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and */ +/* > IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and */ +/* > -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 */ +/* > diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, */ +/* > then rows and columns k+1 and -IPIV(k) were interchanged */ +/* > and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains details of the interchanges and the block */ +/* > structure of D, as determined by CHETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'Y': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(S) * A * diag(S). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] S */ +/* > \verbatim */ +/* > S is REAL array, dimension (N) */ +/* > The scale factors for A. If EQUED = 'Y', A is multiplied on */ +/* > the left and right by diag(S). S is an input argument if FACT = */ +/* > 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED */ +/* > = 'Y', each element of S must be positive. If S is output, each */ +/* > element of S is a power of the radix. If S is input, each element */ +/* > of S should be a power of the radix to ensure a reliable solution */ +/* > and error estimates. Scaling by powers of the radix does not cause */ +/* > rounding errors unless the result underflows or overflows. */ +/* > Rounding errors during scaling lead to refining with a matrix that */ +/* > is not equivalent to the input matrix, producing error estimates */ +/* > that may not be reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if EQUED = 'Y', B is overwritten by diag(S)*B; */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is COMPLEX array, dimension (LDX,NRHS) */ +/* > If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* > system of equations. Note that A and B are modified on exit if */ +/* > EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* > inv(diag(S))*X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RPVGRW */ +/* > \verbatim */ +/* > RPVGRW is REAL */ +/* > Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* > pivot growth factor norm(A)/norm(U). The "f2cmax absolute element" */ +/* > norm is used. If this is much less than 1, then the stability of */ +/* > the LU factorization of the (equilibrated) matrix A could be poor. */ +/* > This also means that the solution X, estimated condition numbers, */ +/* > and error bounds could be unreliable. If factorization fails with */ +/* > 0 for the leading INFO columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > Componentwise relative backward error. This is the */ +/* > componentwise relative backward error of each solution vector X(j) */ +/* > (i.e., the smallest relative change in any element of A or B that */ +/* > makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_ERR_BNDS */ +/* > \verbatim */ +/* > N_ERR_BNDS is INTEGER */ +/* > Number of error bounds to return for each right hand side */ +/* > and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* > ERR_BNDS_COMP below. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NPARAMS */ +/* > \verbatim */ +/* > NPARAMS is INTEGER */ +/* > Specifies the number of parameters set in PARAMS. If <= 0, the */ +/* > PARAMS array is never referenced and default values are used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PARAMS */ +/* > \verbatim */ +/* > PARAMS is REAL array, dimension NPARAMS */ +/* > Specifies algorithm parameters. If an entry is < 0.0, then */ +/* > that entry will be filled with default value used for that */ +/* > parameter. Only positions up to NPARAMS are accessed; defaults */ +/* > are used for higher-numbered parameters. */ +/* > */ +/* > PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* > refinement or not. */ +/* > Default: 1.0 */ +/* > = 0.0: No refinement is performed, and no error bounds are */ +/* > computed. */ +/* > = 1.0: Use the double-precision refinement algorithm, */ +/* > possibly with doubled-single computations if the */ +/* > compilation environment does not support DOUBLE */ +/* > PRECISION. */ +/* > (other values are reserved for future use) */ +/* > */ +/* > PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* > computations allowed for refinement. */ +/* > Default: 10 */ +/* > Aggressive: Set to 100 to permit convergence using approximate */ +/* > factorizations or factorizations other than LU. If */ +/* > the factorization uses a technique other than */ +/* > Gaussian elimination, the guarantees in */ +/* > err_bnds_norm and err_bnds_comp may no longer be */ +/* > trustworthy. */ +/* > */ +/* > PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* > will attempt to find a solution with small componentwise */ +/* > relative error in the double-precision algorithm. Positive */ +/* > is true, 0.0 is false. */ +/* > Default: 1.0 (attempt componentwise convergence) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (5*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. The solution to every right-hand side is */ +/* > guaranteed. */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly singular, so */ +/* > the solution and error bounds could not be computed. RCOND = 0 */ +/* > is returned. */ +/* > = N+J: The solution corresponding to the Jth right-hand side is */ +/* > not guaranteed. The solutions corresponding to other right- */ +/* > hand sides K with K > J may not be guaranteed as well, but */ +/* > only the first such right-hand side is reported. If a small */ +/* > componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* > the Jth right-hand side is the first with a normwise error */ +/* > bound that is not guaranteed (the smallest J such */ +/* > that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* > the Jth right-hand side is the first with either a normwise or */ +/* > componentwise error bound that is not guaranteed (the smallest */ +/* > J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* > ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* > ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* > about all of the right-hand sides check ERR_BNDS_NORM or */ +/* > ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup complexHEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int chesvxx_(char *fact, char *uplo, integer *n, integer * + nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer * + ipiv, char *equed, real *s, complex *b, integer *ldb, complex *x, + integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, complex *work, real *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + real r__1, r__2; + + /* Local variables */ + real amax, smin, smax; + extern real cla_herpvgrw_(char *, integer *, integer *, complex *, + integer *, complex *, integer *, integer *, real *); + integer j; + extern logical lsame_(char *, char *); + real scond; + logical equil, rcequ; + extern /* Subroutine */ int claqhe_(char *, integer *, complex *, integer + *, real *, real *, real *, char *); + extern real slamch_(char *); + logical nofact; + extern /* Subroutine */ int chetrf_(char *, integer *, complex *, integer + *, integer *, complex *, integer *, integer *), clacpy_( + char *, integer *, integer *, complex *, integer *, complex *, + integer *), xerbla_(char *, integer *, ftnlen); + real bignum; + integer infequ; + extern /* Subroutine */ int chetrs_(char *, integer *, integer *, complex + *, integer *, integer *, complex *, integer *, integer *); + real smlnum; + extern /* Subroutine */ int clascl2_(integer *, integer *, real *, + complex *, integer *), cheequb_(char *, integer *, complex *, + integer *, real *, real *, real *, complex *, integer *), + cherfsx_(char *, char *, integer *, integer *, complex *, integer + *, complex *, integer *, integer *, real *, complex *, integer *, + complex *, integer *, real *, real *, integer *, real *, real *, + integer *, real *, complex *, real *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ================================================================== */ + + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --s; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --berr; + --params; + --work; + --rwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + smlnum = slamch_("Safe minimum"); + bignum = 1.f / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rcequ = FALSE_; + } else { + rcequ = lsame_(equed, "Y"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in CHERFSX. */ + + *rpvgrw = 0.f; + +/* Test the input parameters. PARAMS is not tested until CHERFSX. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! lsame_(uplo, "U") && ! lsame_(uplo, + "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (lsame_(fact, "F") && ! (rcequ || lsame_( + equed, "N"))) { + *info = -9; + } else { + if (rcequ) { + smin = bignum; + smax = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + r__1 = smin, r__2 = s[j]; + smin = f2cmin(r__1,r__2); +/* Computing MAX */ + r__1 = smax, r__2 = s[j]; + smax = f2cmax(r__1,r__2); +/* L10: */ + } + if (smin <= 0.f) { + *info = -10; + } else if (*n > 0) { + scond = f2cmax(smin,smlnum) / f2cmin(smax,bignum); + } else { + scond = 1.f; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -12; + } else if (*ldx < f2cmax(1,*n)) { + *info = -14; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHESVXX", &i__1, (ftnlen)7); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + cheequb_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, &work[1], & + infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + claqhe_(uplo, n, &a[a_offset], lda, &s[1], &scond, &amax, equed); + rcequ = lsame_(equed, "Y"); + } + } + +/* Scale the right-hand side. */ + + if (rcequ) { + clascl2_(n, nrhs, &s[1], &b[b_offset], ldb); + } + + if (nofact || equil) { + +/* Compute the LDL^T or UDU^T factorization of A. */ + + clacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf); + i__1 = f2cmax(1,*n) * 5; + chetrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], &i__1, + info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + if (*n > 0) { + *rpvgrw = cla_herpvgrw_(uplo, n, info, &a[a_offset], lda, & + af[af_offset], ldaf, &ipiv[1], &rwork[1]); + } + return 0; + } + } + +/* Compute the reciprocal pivot growth factor RPVGRW. */ + + if (*n > 0) { + *rpvgrw = cla_herpvgrw_(uplo, n, info, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &rwork[1]); + } + +/* Compute the solution matrix X. */ + + clacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + chetrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + cherfsx_(uplo, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & + ipiv[1], &s[1], &b[b_offset], ldb, &x[x_offset], ldx, rcond, & + berr[1], n_err_bnds__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], nparams, ¶ms[1], &work[ + 1], &rwork[1], info); + +/* Scale solutions. */ + + if (rcequ) { + clascl2_(n, nrhs, &s[1], &x[x_offset], ldx); + } + + return 0; + +/* End of CHESVXX */ + +} /* chesvxx_ */ + diff --git a/lapack-netlib/SRC/cheswapr.c b/lapack-netlib/SRC/cheswapr.c new file mode 100644 index 000000000..25eeb2983 --- /dev/null +++ b/lapack-netlib/SRC/cheswapr.c @@ -0,0 +1,630 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHESWAPR applies an elementary permutation on the rows and columns of a Hermitian matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHESWAPR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHESWAPR( UPLO, N, A, LDA, I1, I2) */ + +/* CHARACTER UPLO */ +/* INTEGER I1, I2, LDA, N */ +/* COMPLEX A( LDA, N ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHESWAPR applies an elementary permutation on the rows and the columns of */ +/* > a hermitian matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the details of the factorization are stored */ +/* > as an upper or lower triangular matrix. */ +/* > = 'U': Upper triangular, form is A = U*D*U**T; */ +/* > = 'L': Lower triangular, form is A = L*D*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the NB diagonal matrix D and the multipliers */ +/* > used to obtain the factor U or L as computed by CSYTRF. */ +/* > */ +/* > On exit, if INFO = 0, the (symmetric) inverse of the original */ +/* > matrix. If UPLO = 'U', the upper triangular part of the */ +/* > inverse is formed and the part of A below the diagonal is not */ +/* > referenced; if UPLO = 'L' the lower triangular part of the */ +/* > inverse is formed and the part of A above the diagonal is */ +/* > not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] I1 */ +/* > \verbatim */ +/* > I1 is INTEGER */ +/* > Index of the first row to swap */ +/* > \endverbatim */ +/* > */ +/* > \param[in] I2 */ +/* > \verbatim */ +/* > I2 is INTEGER */ +/* > Index of the second row to swap */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int cheswapr_(char *uplo, integer *n, complex *a, integer * + lda, integer *i1, integer *i2) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + integer i__; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + logical upper; + complex tmp; + + +/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + + /* Function Body */ + upper = lsame_(uplo, "U"); + if (upper) { + +/* UPPER */ +/* first swap */ +/* - swap column I1 and I2 from I1 to I1-1 */ + i__1 = *i1 - 1; + cswap_(&i__1, &a[*i1 * a_dim1 + 1], &c__1, &a[*i2 * a_dim1 + 1], & + c__1); + +/* second swap : */ +/* - swap A(I1,I1) and A(I2,I2) */ +/* - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 */ +/* - swap A(I2,I1) and A(I1,I2) */ + i__1 = *i1 + *i1 * a_dim1; + tmp.r = a[i__1].r, tmp.i = a[i__1].i; + i__1 = *i1 + *i1 * a_dim1; + i__2 = *i2 + *i2 * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = *i2 + *i2 * a_dim1; + a[i__1].r = tmp.r, a[i__1].i = tmp.i; + + i__1 = *i2 - *i1 - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *i1 + (*i1 + i__) * a_dim1; + tmp.r = a[i__2].r, tmp.i = a[i__2].i; + i__2 = *i1 + (*i1 + i__) * a_dim1; + r_cnjg(&q__1, &a[*i1 + i__ + *i2 * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = *i1 + i__ + *i2 * a_dim1; + r_cnjg(&q__1, &tmp); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + } + + i__1 = *i1 + *i2 * a_dim1; + r_cnjg(&q__1, &a[*i1 + *i2 * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + +/* third swap */ +/* - swap row I1 and I2 from I2+1 to N */ + i__1 = *n; + for (i__ = *i2 + 1; i__ <= i__1; ++i__) { + i__2 = *i1 + i__ * a_dim1; + tmp.r = a[i__2].r, tmp.i = a[i__2].i; + i__2 = *i1 + i__ * a_dim1; + i__3 = *i2 + i__ * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; + i__2 = *i2 + i__ * a_dim1; + a[i__2].r = tmp.r, a[i__2].i = tmp.i; + } + + } else { + +/* LOWER */ +/* first swap */ +/* - swap row I1 and I2 from 1 to I1-1 */ + i__1 = *i1 - 1; + cswap_(&i__1, &a[*i1 + a_dim1], lda, &a[*i2 + a_dim1], lda); + +/* second swap : */ +/* - swap A(I1,I1) and A(I2,I2) */ +/* - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 */ +/* - swap A(I2,I1) and A(I1,I2) */ + i__1 = *i1 + *i1 * a_dim1; + tmp.r = a[i__1].r, tmp.i = a[i__1].i; + i__1 = *i1 + *i1 * a_dim1; + i__2 = *i2 + *i2 * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = *i2 + *i2 * a_dim1; + a[i__1].r = tmp.r, a[i__1].i = tmp.i; + + i__1 = *i2 - *i1 - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *i1 + i__ + *i1 * a_dim1; + tmp.r = a[i__2].r, tmp.i = a[i__2].i; + i__2 = *i1 + i__ + *i1 * a_dim1; + r_cnjg(&q__1, &a[*i2 + (*i1 + i__) * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = *i2 + (*i1 + i__) * a_dim1; + r_cnjg(&q__1, &tmp); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + } + + i__1 = *i2 + *i1 * a_dim1; + r_cnjg(&q__1, &a[*i2 + *i1 * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + +/* third swap */ +/* - swap col I1 and I2 from I2+1 to N */ + i__1 = *n; + for (i__ = *i2 + 1; i__ <= i__1; ++i__) { + i__2 = i__ + *i1 * a_dim1; + tmp.r = a[i__2].r, tmp.i = a[i__2].i; + i__2 = i__ + *i1 * a_dim1; + i__3 = i__ + *i2 * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; + i__2 = i__ + *i2 * a_dim1; + a[i__2].r = tmp.r, a[i__2].i = tmp.i; + } + + } + return 0; +} /* cheswapr_ */ + diff --git a/lapack-netlib/SRC/chetd2.c b/lapack-netlib/SRC/chetd2.c new file mode 100644 index 000000000..005aa45e3 --- /dev/null +++ b/lapack-netlib/SRC/chetd2.c @@ -0,0 +1,793 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHETD2 reduces a Hermitian matrix to real symmetric tridiagonal form by an unitary similarity t +ransformation (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHETD2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* REAL D( * ), E( * ) */ +/* COMPLEX A( LDA, * ), TAU( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHETD2 reduces a complex Hermitian matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is REAL array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n-1) . . . H(2) H(1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* > A(1:i-1,i+1), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(n-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( d e v2 v3 v4 ) ( d ) */ +/* > ( d e v3 v4 ) ( e d ) */ +/* > ( d e v4 ) ( v1 e d ) */ +/* > ( d e ) ( v1 v2 e d ) */ +/* > ( d ) ( v1 v2 v3 e d ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int chetd2_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + complex taui; + extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex * + , integer *, complex *, integer *, complex *, integer *); + integer i__; + complex alpha; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex * + , integer *, complex *, integer *, complex *, complex *, integer * + ), caxpy_(integer *, complex *, complex *, integer *, + complex *, integer *); + logical upper; + extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, + integer *, complex *), xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETD2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + i__1 = *n + *n * a_dim1; + i__2 = *n + *n * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v**H */ +/* to annihilate A(1:i-1,i+1) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__1].r, alpha.i = a[i__1].i; + clarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); + i__1 = i__; + e[i__1] = alpha.r; + + if (taui.r != 0.f || taui.i != 0.f) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + chemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b2, &tau[1], &c__1); + +/* Compute w := x - 1/2 * tau * (x**H * v) * v */ + + q__3.r = -.5f, q__3.i = 0.f; + q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * + taui.i + q__3.i * taui.r; + cdotc_(&q__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1] + , &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + caxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w**H - w * v**H */ + + q__1.r = -1.f, q__1.i = 0.f; + cher2_(uplo, &i__, &q__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, & + tau[1], &c__1, &a[a_offset], lda); + + } else { + i__1 = i__ + i__ * a_dim1; + i__2 = i__ + i__ * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } + i__1 = i__ + (i__ + 1) * a_dim1; + i__2 = i__; + a[i__1].r = e[i__2], a[i__1].i = 0.f; + i__1 = i__ + 1; + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + d__[i__1] = a[i__2].r; + i__1 = i__; + tau[i__1].r = taui.r, tau[i__1].i = taui.i; +/* L10: */ + } + i__1 = a_dim1 + 1; + d__[1] = a[i__1].r; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v**H */ +/* to annihilate A(i+2:n,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[f2cmin(i__3,*n) + i__ * a_dim1], &c__1, & + taui); + i__2 = i__; + e[i__2] = alpha.r; + + if (taui.r != 0.f || taui.i != 0.f) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + chemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b2, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x**H * v) * v */ + + q__3.r = -.5f, q__3.i = 0.f; + q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * + taui.i + q__3.i * taui.r; + i__2 = *n - i__; + cdotc_(&q__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * + a_dim1], &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + i__2 = *n - i__; + caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w**H - w * v**H */ + + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = 0.f; + cher2_(uplo, &i__2, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1, + &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); + + } else { + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + i__3 = i__ + 1 + (i__ + 1) * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.f; + i__2 = i__; + i__3 = i__ + i__ * a_dim1; + d__[i__2] = a[i__3].r; + i__2 = i__; + tau[i__2].r = taui.r, tau[i__2].i = taui.i; +/* L20: */ + } + i__1 = *n; + i__2 = *n + *n * a_dim1; + d__[i__1] = a[i__2].r; + } + + return 0; + +/* End of CHETD2 */ + +} /* chetd2_ */ + diff --git a/lapack-netlib/SRC/chetf2.c b/lapack-netlib/SRC/chetf2.c new file mode 100644 index 000000000..92665d573 --- /dev/null +++ b/lapack-netlib/SRC/chetf2.c @@ -0,0 +1,1236 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHETF2 computes the factorization of a complex Hermitian matrix, using the diagonal pivoting me +thod (unblocked algorithm calling Level 2 BLAS). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHETF2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHETF2( UPLO, N, A, LDA, IPIV, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHETF2 computes the factorization of a complex Hermitian matrix A */ +/* > using the Bunch-Kaufman diagonal pivoting method: */ +/* > */ +/* > A = U*D*U**H or A = L*D*L**H */ +/* > */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, U**H is the conjugate transpose of U, and D is */ +/* > Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, the block diagonal matrix D and the multipliers used */ +/* > to obtain the factor U or L (see below for further details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D. */ +/* > */ +/* > If UPLO = 'U': */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) = IPIV(k-1) < 0, then rows and columns */ +/* > k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */ +/* > is a 2-by-2 diagonal block. */ +/* > */ +/* > If UPLO = 'L': */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) = IPIV(k+1) < 0, then rows and columns */ +/* > k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) */ +/* > is a 2-by-2 diagonal block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, D(k,k) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, and division by zero will occur if it */ +/* > is used to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > 09-29-06 - patch from */ +/* > Bobby Cheng, MathWorks */ +/* > */ +/* > Replace l.210 and l.392 */ +/* > IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN */ +/* > by */ +/* > IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. SISNAN(ABSAKK) ) THEN */ +/* > */ +/* > 01-01-96 - Based on modifications by */ +/* > J. Lewis, Boeing Computer Services Company */ +/* > A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ +/* > */ +/* > If UPLO = 'U', then A = U*D*U**H, where */ +/* > U = P(n)*U(n)* ... *P(k)U(k)* ..., */ +/* > i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ +/* > 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* > and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* > defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ +/* > that if the diagonal block D(k) is of order s (s = 1 or 2), then */ +/* > */ +/* > ( I v 0 ) k-s */ +/* > U(k) = ( 0 I 0 ) s */ +/* > ( 0 0 I ) n-k */ +/* > k-s s n-k */ +/* > */ +/* > If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ +/* > If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ +/* > and A(k,k), and v overwrites A(1:k-2,k-1:k). */ +/* > */ +/* > If UPLO = 'L', then A = L*D*L**H, where */ +/* > L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ +/* > i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ +/* > n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* > and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* > defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ +/* > that if the diagonal block D(k) is of order s (s = 1 or 2), then */ +/* > */ +/* > ( I 0 0 ) k-1 */ +/* > L(k) = ( 0 I 0 ) s */ +/* > ( 0 v I ) n-k-s+1 */ +/* > k-1 s n-k-s+1 */ +/* > */ +/* > If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ +/* > If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ +/* > and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int chetf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + real r__1, r__2, r__3, r__4; + complex q__1, q__2, q__3, q__4, q__5, q__6; + + /* Local variables */ + extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, + integer *, complex *, integer *); + integer imax, jmax; + real d__; + integer i__, j, k; + complex t; + real alpha; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + integer kstep; + logical upper; + real r1, d11; + complex d12; + real d22; + complex d21; + extern real slapy2_(real *, real *); + integer kk, kp; + real absakk; + complex wk; + extern integer icamax_(integer *, complex *, integer *); + real tt; + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *, ftnlen); + real colmax; + extern logical sisnan_(real *); + real rowmax; + complex wkm1, wkp1; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETF2", &i__1, (ftnlen)6); + return 0; + } + +/* Initialize ALPHA for use in choosing pivot block size. */ + + alpha = (sqrt(17.f) + 1.f) / 8.f; + + if (upper) { + +/* Factorize A as U*D*U**H using the upper triangle of A */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2 */ + + k = *n; +L10: + +/* If K < 1, exit from loop */ + + if (k < 1) { + goto L90; + } + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (r__1 = a[i__1].r, abs(r__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k > 1) { + i__1 = k - 1; + imax = icamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[imax + + k * a_dim1]), abs(r__2)); + } else { + colmax = 0.f; + } + + if (f2cmax(absakk,colmax) == 0.f || sisnan_(&absakk)) { + +/* Column K is or underflow, or contains a NaN: */ +/* set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } else { + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value */ + + i__1 = k - imax; + jmax = imax + icamax_(&i__1, &a[imax + (imax + 1) * a_dim1], + lda); + i__1 = imax + jmax * a_dim1; + rowmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[ + imax + jmax * a_dim1]), abs(r__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = icamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); +/* Computing MAX */ + i__1 = jmax + imax * a_dim1; + r__3 = rowmax, r__4 = (r__1 = a[i__1].r, abs(r__1)) + ( + r__2 = r_imag(&a[jmax + imax * a_dim1]), abs(r__2) + ); + rowmax = f2cmax(r__3,r__4); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else /* if(complicated condition) */ { + i__1 = imax + imax * a_dim1; + if ((r__1 = a[i__1].r, abs(r__1)) >= alpha * rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + } else { + +/* interchange rows and columns K-1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + } + + kk = k - kstep + 1; + if (kp != kk) { + +/* Interchange rows and columns KK and KP in the leading */ +/* submatrix A(1:k,1:k) */ + + i__1 = kp - 1; + cswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], + &c__1); + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + kk * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + kk * a_dim1; + r_cnjg(&q__1, &a[kp + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L20: */ + } + i__1 = kp + kk * a_dim1; + r_cnjg(&q__1, &a[kp + kk * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + if (kstep == 2) { + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } + } + +/* Update the leading submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = U(k)*D(k) */ + +/* where U(k) is the k-th column of U */ + +/* Perform a rank-1 update of A(1:k-1,1:k-1) as */ + +/* A := A - U(k)*D(k)*U(k)**H = A - W(k)*1/D(k)*W(k)**H */ + + i__1 = k + k * a_dim1; + r1 = 1.f / a[i__1].r; + i__1 = k - 1; + r__1 = -r1; + cher_(uplo, &i__1, &r__1, &a[k * a_dim1 + 1], &c__1, &a[ + a_offset], lda); + +/* Store U(k) in column k */ + + i__1 = k - 1; + csscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + +/* 2-by-2 pivot block D(k): columns k and k-1 now hold */ + +/* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ + +/* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ +/* of U */ + +/* Perform a rank-2 update of A(1:k-2,1:k-2) as */ + +/* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**H */ +/* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**H */ + + if (k > 2) { + + i__1 = k - 1 + k * a_dim1; + r__1 = a[i__1].r; + r__2 = r_imag(&a[k - 1 + k * a_dim1]); + d__ = slapy2_(&r__1, &r__2); + i__1 = k - 1 + (k - 1) * a_dim1; + d22 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d11 = a[i__1].r / d__; + tt = 1.f / (d11 * d22 - 1.f); + i__1 = k - 1 + k * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d12.r = q__1.r, d12.i = q__1.i; + d__ = tt / d__; + + for (j = k - 2; j >= 1; --j) { + i__1 = j + (k - 1) * a_dim1; + q__3.r = d11 * a[i__1].r, q__3.i = d11 * a[i__1].i; + r_cnjg(&q__5, &d12); + i__2 = j + k * a_dim1; + q__4.r = q__5.r * a[i__2].r - q__5.i * a[i__2].i, + q__4.i = q__5.r * a[i__2].i + q__5.i * a[i__2] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = d__ * q__2.r, q__1.i = d__ * q__2.i; + wkm1.r = q__1.r, wkm1.i = q__1.i; + i__1 = j + k * a_dim1; + q__3.r = d22 * a[i__1].r, q__3.i = d22 * a[i__1].i; + i__2 = j + (k - 1) * a_dim1; + q__4.r = d12.r * a[i__2].r - d12.i * a[i__2].i, + q__4.i = d12.r * a[i__2].i + d12.i * a[i__2] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = d__ * q__2.r, q__1.i = d__ * q__2.i; + wk.r = q__1.r, wk.i = q__1.i; + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + r_cnjg(&q__4, &wk); + q__3.r = a[i__3].r * q__4.r - a[i__3].i * q__4.i, + q__3.i = a[i__3].r * q__4.i + a[i__3].i * + q__4.r; + q__2.r = a[i__2].r - q__3.r, q__2.i = a[i__2].i - + q__3.i; + i__4 = i__ + (k - 1) * a_dim1; + r_cnjg(&q__6, &wkm1); + q__5.r = a[i__4].r * q__6.r - a[i__4].i * q__6.i, + q__5.i = a[i__4].r * q__6.i + a[i__4].i * + q__6.r; + q__1.r = q__2.r - q__5.r, q__1.i = q__2.i - + q__5.i; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* L30: */ + } + i__1 = j + k * a_dim1; + a[i__1].r = wk.r, a[i__1].i = wk.i; + i__1 = j + (k - 1) * a_dim1; + a[i__1].r = wkm1.r, a[i__1].i = wkm1.i; + i__1 = j + j * a_dim1; + i__2 = j + j * a_dim1; + r__1 = a[i__2].r; + q__1.r = r__1, q__1.i = 0.f; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* L40: */ + } + + } + + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kstep; + goto L10; + + } else { + +/* Factorize A as L*D*L**H using the lower triangle of A */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2 */ + + k = 1; +L50: + +/* If K > N, exit from loop */ + + if (k > *n) { + goto L90; + } + kstep = 1; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (r__1 = a[i__1].r, abs(r__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k < *n) { + i__1 = *n - k; + imax = k + icamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[imax + + k * a_dim1]), abs(r__2)); + } else { + colmax = 0.f; + } + + if (f2cmax(absakk,colmax) == 0.f || sisnan_(&absakk)) { + +/* Column K is zero or underflow, contains a NaN: */ +/* set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } else { + if (absakk >= alpha * colmax) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else { + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value */ + + i__1 = imax - k; + jmax = k - 1 + icamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[ + imax + jmax * a_dim1]), abs(r__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + icamax_(&i__1, &a[imax + 1 + imax * a_dim1], + &c__1); +/* Computing MAX */ + i__1 = jmax + imax * a_dim1; + r__3 = rowmax, r__4 = (r__1 = a[i__1].r, abs(r__1)) + ( + r__2 = r_imag(&a[jmax + imax * a_dim1]), abs(r__2) + ); + rowmax = f2cmax(r__3,r__4); + } + + if (absakk >= alpha * colmax * (colmax / rowmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + } else /* if(complicated condition) */ { + i__1 = imax + imax * a_dim1; + if ((r__1 = a[i__1].r, abs(r__1)) >= alpha * rowmax) { + +/* interchange rows and columns K and IMAX, use 1-by-1 */ +/* pivot block */ + + kp = imax; + } else { + +/* interchange rows and columns K+1 and IMAX, use 2-by-2 */ +/* pivot block */ + + kp = imax; + kstep = 2; + } + } + } + + kk = k + kstep - 1; + if (kp != kk) { + +/* Interchange rows and columns KK and KP in the trailing */ +/* submatrix A(k:n,k:n) */ + + if (kp < *n) { + i__1 = *n - kp; + cswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + + kp * a_dim1], &c__1); + } + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + kk * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + kk * a_dim1; + r_cnjg(&q__1, &a[kp + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L60: */ + } + i__1 = kp + kk * a_dim1; + r_cnjg(&q__1, &a[kp + kk * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + if (kstep == 2) { + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } + } + +/* Update the trailing submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = L(k)*D(k) */ + +/* where L(k) is the k-th column of L */ + + if (k < *n) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ + +/* A := A - L(k)*D(k)*L(k)**H = A - W(k)*(1/D(k))*W(k)**H */ + + i__1 = k + k * a_dim1; + r1 = 1.f / a[i__1].r; + i__1 = *n - k; + r__1 = -r1; + cher_(uplo, &i__1, &r__1, &a[k + 1 + k * a_dim1], &c__1, & + a[k + 1 + (k + 1) * a_dim1], lda); + +/* Store L(k) in column K */ + + i__1 = *n - k; + csscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + +/* 2-by-2 pivot block D(k) */ + + if (k < *n - 1) { + +/* Perform a rank-2 update of A(k+2:n,k+2:n) as */ + +/* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**H */ +/* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**H */ + +/* where L(k) and L(k+1) are the k-th and (k+1)-th */ +/* columns of L */ + + i__1 = k + 1 + k * a_dim1; + r__1 = a[i__1].r; + r__2 = r_imag(&a[k + 1 + k * a_dim1]); + d__ = slapy2_(&r__1, &r__2); + i__1 = k + 1 + (k + 1) * a_dim1; + d11 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d22 = a[i__1].r / d__; + tt = 1.f / (d11 * d22 - 1.f); + i__1 = k + 1 + k * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d21.r = q__1.r, d21.i = q__1.i; + d__ = tt / d__; + + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + q__3.r = d11 * a[i__2].r, q__3.i = d11 * a[i__2].i; + i__3 = j + (k + 1) * a_dim1; + q__4.r = d21.r * a[i__3].r - d21.i * a[i__3].i, + q__4.i = d21.r * a[i__3].i + d21.i * a[i__3] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = d__ * q__2.r, q__1.i = d__ * q__2.i; + wk.r = q__1.r, wk.i = q__1.i; + i__2 = j + (k + 1) * a_dim1; + q__3.r = d22 * a[i__2].r, q__3.i = d22 * a[i__2].i; + r_cnjg(&q__5, &d21); + i__3 = j + k * a_dim1; + q__4.r = q__5.r * a[i__3].r - q__5.i * a[i__3].i, + q__4.i = q__5.r * a[i__3].i + q__5.i * a[i__3] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = d__ * q__2.r, q__1.i = d__ * q__2.i; + wkp1.r = q__1.r, wkp1.i = q__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + r_cnjg(&q__4, &wk); + q__3.r = a[i__5].r * q__4.r - a[i__5].i * q__4.i, + q__3.i = a[i__5].r * q__4.i + a[i__5].i * + q__4.r; + q__2.r = a[i__4].r - q__3.r, q__2.i = a[i__4].i - + q__3.i; + i__6 = i__ + (k + 1) * a_dim1; + r_cnjg(&q__6, &wkp1); + q__5.r = a[i__6].r * q__6.r - a[i__6].i * q__6.i, + q__5.i = a[i__6].r * q__6.i + a[i__6].i * + q__6.r; + q__1.r = q__2.r - q__5.r, q__1.i = q__2.i - + q__5.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L70: */ + } + i__2 = j + k * a_dim1; + a[i__2].r = wk.r, a[i__2].i = wk.i; + i__2 = j + (k + 1) * a_dim1; + a[i__2].r = wkp1.r, a[i__2].i = wkp1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + q__1.r = r__1, q__1.i = 0.f; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L80: */ + } + } + } + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + +/* Increase K and return to the start of the main loop */ + + k += kstep; + goto L50; + + } + +L90: + return 0; + +/* End of CHETF2 */ + +} /* chetf2_ */ + diff --git a/lapack-netlib/SRC/chetf2_rk.c b/lapack-netlib/SRC/chetf2_rk.c new file mode 100644 index 000000000..ffbb18a28 --- /dev/null +++ b/lapack-netlib/SRC/chetf2_rk.c @@ -0,0 +1,1721 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHETF2_RK computes the factorization of a complex Hermitian indefinite matrix using the bounded + Bunch-Kaufman (rook) diagonal pivoting method (BLAS2 unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHETF2_RK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHETF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ), E ( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > CHETF2_RK computes the factorization of a complex Hermitian matrix A */ +/* > using the bounded Bunch-Kaufman (rook) diagonal pivoting method: */ +/* > */ +/* > A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), */ +/* > */ +/* > where U (or L) is unit upper (or lower) triangular matrix, */ +/* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */ +/* > matrix, P**T is the transpose of P, and D is Hermitian and block */ +/* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */ +/* > For more information see Further Details section. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. */ +/* > If UPLO = 'U': the leading N-by-N upper triangular part */ +/* > of A contains the upper triangular part of the matrix A, */ +/* > and the strictly lower triangular part of A is not */ +/* > referenced. */ +/* > */ +/* > If UPLO = 'L': the leading N-by-N lower triangular part */ +/* > of A contains the lower triangular part of the matrix A, */ +/* > and the strictly upper triangular part of A is not */ +/* > referenced. */ +/* > */ +/* > On exit, contains: */ +/* > a) ONLY diagonal elements of the Hermitian block diagonal */ +/* > matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */ +/* > (superdiagonal (or subdiagonal) elements of D */ +/* > are stored on exit in array E), and */ +/* > b) If UPLO = 'U': factor U in the superdiagonal part of A. */ +/* > If UPLO = 'L': factor L in the subdiagonal part of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is COMPLEX array, dimension (N) */ +/* > On exit, contains the superdiagonal (or subdiagonal) */ +/* > elements of the Hermitian block diagonal matrix D */ +/* > with 1-by-1 or 2-by-2 diagonal blocks, where */ +/* > If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; */ +/* > If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. */ +/* > */ +/* > NOTE: For 1-by-1 diagonal block D(k), where */ +/* > 1 <= k <= N, the element E(k) is set to 0 in both */ +/* > UPLO = 'U' or UPLO = 'L' cases. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > IPIV describes the permutation matrix P in the factorization */ +/* > of matrix A as follows. The absolute value of IPIV(k) */ +/* > represents the index of row and column that were */ +/* > interchanged with the k-th row and column. The value of UPLO */ +/* > describes the order in which the interchanges were applied. */ +/* > Also, the sign of IPIV represents the block structure of */ +/* > the Hermitian block diagonal matrix D with 1-by-1 or 2-by-2 */ +/* > diagonal blocks which correspond to 1 or 2 interchanges */ +/* > at each factorization step. For more info see Further */ +/* > Details section. */ +/* > */ +/* > If UPLO = 'U', */ +/* > ( in factorization order, k decreases from N to 1 ): */ +/* > a) A single positive entry IPIV(k) > 0 means: */ +/* > D(k,k) is a 1-by-1 diagonal block. */ +/* > If IPIV(k) != k, rows and columns k and IPIV(k) were */ +/* > interchanged in the matrix A(1:N,1:N); */ +/* > If IPIV(k) = k, no interchange occurred. */ +/* > */ +/* > b) A pair of consecutive negative entries */ +/* > IPIV(k) < 0 and IPIV(k-1) < 0 means: */ +/* > D(k-1:k,k-1:k) is a 2-by-2 diagonal block. */ +/* > (NOTE: negative entries in IPIV appear ONLY in pairs). */ +/* > 1) If -IPIV(k) != k, rows and columns */ +/* > k and -IPIV(k) were interchanged */ +/* > in the matrix A(1:N,1:N). */ +/* > If -IPIV(k) = k, no interchange occurred. */ +/* > 2) If -IPIV(k-1) != k-1, rows and columns */ +/* > k-1 and -IPIV(k-1) were interchanged */ +/* > in the matrix A(1:N,1:N). */ +/* > If -IPIV(k-1) = k-1, no interchange occurred. */ +/* > */ +/* > c) In both cases a) and b), always ABS( IPIV(k) ) <= k. */ +/* > */ +/* > d) NOTE: Any entry IPIV(k) is always NONZERO on output. */ +/* > */ +/* > If UPLO = 'L', */ +/* > ( in factorization order, k increases from 1 to N ): */ +/* > a) A single positive entry IPIV(k) > 0 means: */ +/* > D(k,k) is a 1-by-1 diagonal block. */ +/* > If IPIV(k) != k, rows and columns k and IPIV(k) were */ +/* > interchanged in the matrix A(1:N,1:N). */ +/* > If IPIV(k) = k, no interchange occurred. */ +/* > */ +/* > b) A pair of consecutive negative entries */ +/* > IPIV(k) < 0 and IPIV(k+1) < 0 means: */ +/* > D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > (NOTE: negative entries in IPIV appear ONLY in pairs). */ +/* > 1) If -IPIV(k) != k, rows and columns */ +/* > k and -IPIV(k) were interchanged */ +/* > in the matrix A(1:N,1:N). */ +/* > If -IPIV(k) = k, no interchange occurred. */ +/* > 2) If -IPIV(k+1) != k+1, rows and columns */ +/* > k-1 and -IPIV(k-1) were interchanged */ +/* > in the matrix A(1:N,1:N). */ +/* > If -IPIV(k+1) = k+1, no interchange occurred. */ +/* > */ +/* > c) In both cases a) and b), always ABS( IPIV(k) ) >= k. */ +/* > */ +/* > d) NOTE: Any entry IPIV(k) is always NONZERO on output. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > */ +/* > < 0: If INFO = -k, the k-th argument had an illegal value */ +/* > */ +/* > > 0: If INFO = k, the matrix A is singular, because: */ +/* > If UPLO = 'U': column k in the upper */ +/* > triangular part of A contains all zeros. */ +/* > If UPLO = 'L': column k in the lower */ +/* > triangular part of A contains all zeros. */ +/* > */ +/* > Therefore D(k,k) is exactly zero, and superdiagonal */ +/* > elements of column k of U (or subdiagonal elements of */ +/* > column k of L ) are all zeros. The factorization has */ +/* > been completed, but the block diagonal matrix D is */ +/* > exactly singular, and division by zero will occur if */ +/* > it is used to solve a system of equations. */ +/* > */ +/* > NOTE: INFO only stores the first occurrence of */ +/* > a singularity, any subsequent occurrence of singularity */ +/* > is not stored in INFO even though the factorization */ +/* > always completes. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > TODO: put further details */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > December 2016, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > 01-01-96 - Based on modifications by */ +/* > J. Lewis, Boeing Computer Services Company */ +/* > A. Petitet, Computer Science Dept., */ +/* > Univ. of Tenn., Knoxville abd , USA */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int chetf2_rk_(char *uplo, integer *n, complex *a, integer * + lda, complex *e, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + real r__1, r__2; + complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8; + + /* Local variables */ + extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, + integer *, complex *, integer *); + logical done; + integer imax, jmax; + real d__; + integer i__, j, k, p; + complex t; + real alpha; + extern logical lsame_(char *, char *); + real sfmin; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + integer itemp, kstep; + real stemp; + logical upper; + real r1, d11; + complex d12; + real d22; + complex d21; + extern real slapy2_(real *, real *); + integer ii, kk, kp; + real absakk; + complex wk; + extern integer icamax_(integer *, complex *, integer *); + extern real slamch_(char *); + real tt; + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *, ftnlen); + real colmax, rowmax; + complex wkm1, wkp1; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ====================================================================== */ + + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --e; + --ipiv; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETF2_RK", &i__1, (ftnlen)9); + return 0; + } + +/* Initialize ALPHA for use in choosing pivot block size. */ + + alpha = (sqrt(17.f) + 1.f) / 8.f; + +/* Compute machine safe minimum */ + + sfmin = slamch_("S"); + + if (upper) { + +/* Factorize A as U*D*U**H using the upper triangle of A */ + +/* Initialize the first entry of array E, where superdiagonal */ +/* elements of D are stored */ + + e[1].r = 0.f, e[1].i = 0.f; + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2 */ + + k = *n; +L10: + +/* If K < 1, exit from loop */ + + if (k < 1) { + goto L34; + } + kstep = 1; + p = k; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (r__1 = a[i__1].r, abs(r__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k > 1) { + i__1 = k - 1; + imax = icamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[imax + + k * a_dim1]), abs(r__2)); + } else { + colmax = 0.f; + } + + if (f2cmax(absakk,colmax) == 0.f) { + +/* Column K is zero or underflow: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + +/* Set E( K ) to zero */ + + if (k > 1) { + i__1 = k; + e[i__1].r = 0.f, e[i__1].i = 0.f; + } + + } else { + +/* ============================================================ */ + +/* BEGIN pivot search */ + +/* Case(1) */ +/* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX */ +/* (used to handle NaN and Inf) */ + + if (! (absakk < alpha * colmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else { + + done = FALSE_; + +/* Loop until pivot found */ + +L12: + +/* BEGIN pivot search loop body */ + + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine both ROWMAX and JMAX. */ + + if (imax != k) { + i__1 = k - imax; + jmax = imax + icamax_(&i__1, &a[imax + (imax + 1) * + a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(& + a[imax + jmax * a_dim1]), abs(r__2)); + } else { + rowmax = 0.f; + } + + if (imax > 1) { + i__1 = imax - 1; + itemp = icamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = itemp + imax * a_dim1; + stemp = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[ + itemp + imax * a_dim1]), abs(r__2)); + if (stemp > rowmax) { + rowmax = stemp; + jmax = itemp; + } + } + +/* Case(2) */ +/* Equivalent to testing for */ +/* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX */ +/* (used to handle NaN and Inf) */ + + i__1 = imax + imax * a_dim1; + if (! ((r__1 = a[i__1].r, abs(r__1)) < alpha * rowmax)) { + +/* interchange rows and columns K and IMAX, */ +/* use 1-by-1 pivot block */ + + kp = imax; + done = TRUE_; + +/* Case(3) */ +/* Equivalent to testing for ROWMAX.EQ.COLMAX, */ +/* (used to handle NaN and Inf) */ + + } else if (p == jmax || rowmax <= colmax) { + +/* interchange rows and columns K-1 and IMAX, */ +/* use 2-by-2 pivot block */ + + kp = imax; + kstep = 2; + done = TRUE_; + +/* Case(4) */ + } else { + +/* Pivot not found: set params and repeat */ + + p = imax; + colmax = rowmax; + imax = jmax; + } + +/* END pivot search loop body */ + + if (! done) { + goto L12; + } + + } + +/* END pivot search */ + +/* ============================================================ */ + +/* KK is the column of A where pivoting step stopped */ + + kk = k - kstep + 1; + +/* For only a 2x2 pivot, interchange rows and columns K and P */ +/* in the leading submatrix A(1:k,1:k) */ + + if (kstep == 2 && p != k) { +/* (1) Swap columnar parts */ + if (p > 1) { + i__1 = p - 1; + cswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 + + 1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = k - 1; + for (j = p + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + k * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + k * a_dim1; + r_cnjg(&q__1, &a[p + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = p + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L14: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = p + k * a_dim1; + r_cnjg(&q__1, &a[p + k * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = k + k * a_dim1; + r1 = a[i__1].r; + i__1 = k + k * a_dim1; + i__2 = p + p * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = p + p * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + +/* Convert upper triangle of A into U form by applying */ +/* the interchanges in columns k+1:N. */ + + if (k < *n) { + i__1 = *n - k; + cswap_(&i__1, &a[k + (k + 1) * a_dim1], lda, &a[p + (k + + 1) * a_dim1], lda); + } + + } + +/* For both 1x1 and 2x2 pivots, interchange rows and */ +/* columns KK and KP in the leading submatrix A(1:k,1:k) */ + + if (kp != kk) { +/* (1) Swap columnar parts */ + if (kp > 1) { + i__1 = kp - 1; + cswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + + 1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + kk * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + kk * a_dim1; + r_cnjg(&q__1, &a[kp + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L15: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = kp + kk * a_dim1; + r_cnjg(&q__1, &a[kp + kk * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + + if (kstep == 2) { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; +/* (5) Swap row elements */ + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + +/* Convert upper triangle of A into U form by applying */ +/* the interchanges in columns k+1:N. */ + + if (k < *n) { + i__1 = *n - k; + cswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + + 1) * a_dim1], lda); + } + + } else { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + if (kstep == 2) { + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } + } + +/* Update the leading submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = U(k)*D(k) */ + +/* where U(k) is the k-th column of U */ + + if (k > 1) { + +/* Perform a rank-1 update of A(1:k-1,1:k-1) and */ +/* store U(k) in column k */ + + i__1 = k + k * a_dim1; + if ((r__1 = a[i__1].r, abs(r__1)) >= sfmin) { + +/* Perform a rank-1 update of A(1:k-1,1:k-1) as */ +/* A := A - U(k)*D(k)*U(k)**T */ +/* = A - W(k)*1/D(k)*W(k)**T */ + + i__1 = k + k * a_dim1; + d11 = 1.f / a[i__1].r; + i__1 = k - 1; + r__1 = -d11; + cher_(uplo, &i__1, &r__1, &a[k * a_dim1 + 1], &c__1, & + a[a_offset], lda); + +/* Store U(k) in column k */ + + i__1 = k - 1; + csscal_(&i__1, &d11, &a[k * a_dim1 + 1], &c__1); + } else { + +/* Store L(k) in column K */ + + i__1 = k + k * a_dim1; + d11 = a[i__1].r; + i__1 = k - 1; + for (ii = 1; ii <= i__1; ++ii) { + i__2 = ii + k * a_dim1; + i__3 = ii + k * a_dim1; + q__1.r = a[i__3].r / d11, q__1.i = a[i__3].i / + d11; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L16: */ + } + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - U(k)*D(k)*U(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ +/* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T */ + + i__1 = k - 1; + r__1 = -d11; + cher_(uplo, &i__1, &r__1, &a[k * a_dim1 + 1], &c__1, & + a[a_offset], lda); + } + +/* Store the superdiagonal element of D in array E */ + + i__1 = k; + e[i__1].r = 0.f, e[i__1].i = 0.f; + + } + + } else { + +/* 2-by-2 pivot block D(k): columns k and k-1 now hold */ + +/* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ + +/* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ +/* of U */ + +/* Perform a rank-2 update of A(1:k-2,1:k-2) as */ + +/* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T */ +/* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T */ + +/* and store L(k) and L(k+1) in columns k and k+1 */ + + if (k > 2) { +/* D = |A12| */ + i__1 = k - 1 + k * a_dim1; + r__1 = a[i__1].r; + r__2 = r_imag(&a[k - 1 + k * a_dim1]); + d__ = slapy2_(&r__1, &r__2); + i__1 = k + k * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d11 = q__1.r; + i__1 = k - 1 + (k - 1) * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d22 = q__1.r; + i__1 = k - 1 + k * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d12.r = q__1.r, d12.i = q__1.i; + tt = 1.f / (d11 * d22 - 1.f); + + for (j = k - 2; j >= 1; --j) { + +/* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J */ + + i__1 = j + (k - 1) * a_dim1; + q__3.r = d11 * a[i__1].r, q__3.i = d11 * a[i__1].i; + r_cnjg(&q__5, &d12); + i__2 = j + k * a_dim1; + q__4.r = q__5.r * a[i__2].r - q__5.i * a[i__2].i, + q__4.i = q__5.r * a[i__2].i + q__5.i * a[i__2] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = tt * q__2.r, q__1.i = tt * q__2.i; + wkm1.r = q__1.r, wkm1.i = q__1.i; + i__1 = j + k * a_dim1; + q__3.r = d22 * a[i__1].r, q__3.i = d22 * a[i__1].i; + i__2 = j + (k - 1) * a_dim1; + q__4.r = d12.r * a[i__2].r - d12.i * a[i__2].i, + q__4.i = d12.r * a[i__2].i + d12.i * a[i__2] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = tt * q__2.r, q__1.i = tt * q__2.i; + wk.r = q__1.r, wk.i = q__1.i; + +/* Perform a rank-2 update of A(1:k-2,1:k-2) */ + + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + q__4.r = a[i__3].r / d__, q__4.i = a[i__3].i / + d__; + r_cnjg(&q__5, &wk); + q__3.r = q__4.r * q__5.r - q__4.i * q__5.i, + q__3.i = q__4.r * q__5.i + q__4.i * + q__5.r; + q__2.r = a[i__2].r - q__3.r, q__2.i = a[i__2].i - + q__3.i; + i__4 = i__ + (k - 1) * a_dim1; + q__7.r = a[i__4].r / d__, q__7.i = a[i__4].i / + d__; + r_cnjg(&q__8, &wkm1); + q__6.r = q__7.r * q__8.r - q__7.i * q__8.i, + q__6.i = q__7.r * q__8.i + q__7.i * + q__8.r; + q__1.r = q__2.r - q__6.r, q__1.i = q__2.i - + q__6.i; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* L20: */ + } + +/* Store U(k) and U(k-1) in cols k and k-1 for row J */ + + i__1 = j + k * a_dim1; + q__1.r = wk.r / d__, q__1.i = wk.i / d__; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + i__1 = j + (k - 1) * a_dim1; + q__1.r = wkm1.r / d__, q__1.i = wkm1.i / d__; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = j + j * a_dim1; + i__2 = j + j * a_dim1; + r__1 = a[i__2].r; + q__1.r = r__1, q__1.i = 0.f; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + +/* L30: */ + } + + } + +/* Copy superdiagonal elements of D(K) to E(K) and */ +/* ZERO out superdiagonal entry of A */ + + i__1 = k; + i__2 = k - 1 + k * a_dim1; + e[i__1].r = a[i__2].r, e[i__1].i = a[i__2].i; + i__1 = k - 1; + e[i__1].r = 0.f, e[i__1].i = 0.f; + i__1 = k - 1 + k * a_dim1; + a[i__1].r = 0.f, a[i__1].i = 0.f; + + } + +/* End column K is nonsingular */ + + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -p; + ipiv[k - 1] = -kp; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kstep; + goto L10; + +L34: + + ; + } else { + +/* Factorize A as L*D*L**H using the lower triangle of A */ + +/* Initialize the unused last entry of the subdiagonal array E. */ + + i__1 = *n; + e[i__1].r = 0.f, e[i__1].i = 0.f; + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2 */ + + k = 1; +L40: + +/* If K > N, exit from loop */ + + if (k > *n) { + goto L64; + } + kstep = 1; + p = k; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (r__1 = a[i__1].r, abs(r__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k < *n) { + i__1 = *n - k; + imax = k + icamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[imax + + k * a_dim1]), abs(r__2)); + } else { + colmax = 0.f; + } + + if (f2cmax(absakk,colmax) == 0.f) { + +/* Column K is zero or underflow: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + +/* Set E( K ) to zero */ + + if (k < *n) { + i__1 = k; + e[i__1].r = 0.f, e[i__1].i = 0.f; + } + + } else { + +/* ============================================================ */ + +/* BEGIN pivot search */ + +/* Case(1) */ +/* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX */ +/* (used to handle NaN and Inf) */ + + if (! (absakk < alpha * colmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else { + + done = FALSE_; + +/* Loop until pivot found */ + +L42: + +/* BEGIN pivot search loop body */ + + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine both ROWMAX and JMAX. */ + + if (imax != k) { + i__1 = imax - k; + jmax = k - 1 + icamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(& + a[imax + jmax * a_dim1]), abs(r__2)); + } else { + rowmax = 0.f; + } + + if (imax < *n) { + i__1 = *n - imax; + itemp = imax + icamax_(&i__1, &a[imax + 1 + imax * a_dim1] + , &c__1); + i__1 = itemp + imax * a_dim1; + stemp = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[ + itemp + imax * a_dim1]), abs(r__2)); + if (stemp > rowmax) { + rowmax = stemp; + jmax = itemp; + } + } + +/* Case(2) */ +/* Equivalent to testing for */ +/* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX */ +/* (used to handle NaN and Inf) */ + + i__1 = imax + imax * a_dim1; + if (! ((r__1 = a[i__1].r, abs(r__1)) < alpha * rowmax)) { + +/* interchange rows and columns K and IMAX, */ +/* use 1-by-1 pivot block */ + + kp = imax; + done = TRUE_; + +/* Case(3) */ +/* Equivalent to testing for ROWMAX.EQ.COLMAX, */ +/* (used to handle NaN and Inf) */ + + } else if (p == jmax || rowmax <= colmax) { + +/* interchange rows and columns K+1 and IMAX, */ +/* use 2-by-2 pivot block */ + + kp = imax; + kstep = 2; + done = TRUE_; + +/* Case(4) */ + } else { + +/* Pivot not found: set params and repeat */ + + p = imax; + colmax = rowmax; + imax = jmax; + } + + +/* END pivot search loop body */ + + if (! done) { + goto L42; + } + + } + +/* END pivot search */ + +/* ============================================================ */ + +/* KK is the column of A where pivoting step stopped */ + + kk = k + kstep - 1; + +/* For only a 2x2 pivot, interchange rows and columns K and P */ +/* in the trailing submatrix A(k:n,k:n) */ + + if (kstep == 2 && p != k) { +/* (1) Swap columnar parts */ + if (p < *n) { + i__1 = *n - p; + cswap_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + 1 + p + * a_dim1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = p - 1; + for (j = k + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + k * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + k * a_dim1; + r_cnjg(&q__1, &a[p + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = p + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L44: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = p + k * a_dim1; + r_cnjg(&q__1, &a[p + k * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = k + k * a_dim1; + r1 = a[i__1].r; + i__1 = k + k * a_dim1; + i__2 = p + p * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = p + p * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + +/* Convert lower triangle of A into L form by applying */ +/* the interchanges in columns 1:k-1. */ + + if (k > 1) { + i__1 = k - 1; + cswap_(&i__1, &a[k + a_dim1], lda, &a[p + a_dim1], lda); + } + + } + +/* For both 1x1 and 2x2 pivots, interchange rows and */ +/* columns KK and KP in the trailing submatrix A(k:n,k:n) */ + + if (kp != kk) { +/* (1) Swap columnar parts */ + if (kp < *n) { + i__1 = *n - kp; + cswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + + kp * a_dim1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + kk * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + kk * a_dim1; + r_cnjg(&q__1, &a[kp + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L45: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = kp + kk * a_dim1; + r_cnjg(&q__1, &a[kp + kk * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + + if (kstep == 2) { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; +/* (5) Swap row elements */ + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + +/* Convert lower triangle of A into L form by applying */ +/* the interchanges in columns 1:k-1. */ + + if (k > 1) { + i__1 = k - 1; + cswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + + } else { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + if (kstep == 2) { + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } + } + +/* Update the trailing submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k of A now holds */ + +/* W(k) = L(k)*D(k), */ + +/* where L(k) is the k-th column of L */ + + if (k < *n) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) and */ +/* store L(k) in column k */ + +/* Handle division by a small number */ + + i__1 = k + k * a_dim1; + if ((r__1 = a[i__1].r, abs(r__1)) >= sfmin) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - L(k)*D(k)*L(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ + + i__1 = k + k * a_dim1; + d11 = 1.f / a[i__1].r; + i__1 = *n - k; + r__1 = -d11; + cher_(uplo, &i__1, &r__1, &a[k + 1 + k * a_dim1], & + c__1, &a[k + 1 + (k + 1) * a_dim1], lda); + +/* Store L(k) in column k */ + + i__1 = *n - k; + csscal_(&i__1, &d11, &a[k + 1 + k * a_dim1], &c__1); + } else { + +/* Store L(k) in column k */ + + i__1 = k + k * a_dim1; + d11 = a[i__1].r; + i__1 = *n; + for (ii = k + 1; ii <= i__1; ++ii) { + i__2 = ii + k * a_dim1; + i__3 = ii + k * a_dim1; + q__1.r = a[i__3].r / d11, q__1.i = a[i__3].i / + d11; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L46: */ + } + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - L(k)*D(k)*L(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ +/* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T */ + + i__1 = *n - k; + r__1 = -d11; + cher_(uplo, &i__1, &r__1, &a[k + 1 + k * a_dim1], & + c__1, &a[k + 1 + (k + 1) * a_dim1], lda); + } + +/* Store the subdiagonal element of D in array E */ + + i__1 = k; + e[i__1].r = 0.f, e[i__1].i = 0.f; + + } + + } else { + +/* 2-by-2 pivot block D(k): columns k and k+1 now hold */ + +/* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */ + +/* where L(k) and L(k+1) are the k-th and (k+1)-th columns */ +/* of L */ + + +/* Perform a rank-2 update of A(k+2:n,k+2:n) as */ + +/* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T */ +/* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T */ + +/* and store L(k) and L(k+1) in columns k and k+1 */ + + if (k < *n - 1) { +/* D = |A21| */ + i__1 = k + 1 + k * a_dim1; + r__1 = a[i__1].r; + r__2 = r_imag(&a[k + 1 + k * a_dim1]); + d__ = slapy2_(&r__1, &r__2); + i__1 = k + 1 + (k + 1) * a_dim1; + d11 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d22 = a[i__1].r / d__; + i__1 = k + 1 + k * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d21.r = q__1.r, d21.i = q__1.i; + tt = 1.f / (d11 * d22 - 1.f); + + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + +/* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J */ + + i__2 = j + k * a_dim1; + q__3.r = d11 * a[i__2].r, q__3.i = d11 * a[i__2].i; + i__3 = j + (k + 1) * a_dim1; + q__4.r = d21.r * a[i__3].r - d21.i * a[i__3].i, + q__4.i = d21.r * a[i__3].i + d21.i * a[i__3] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = tt * q__2.r, q__1.i = tt * q__2.i; + wk.r = q__1.r, wk.i = q__1.i; + i__2 = j + (k + 1) * a_dim1; + q__3.r = d22 * a[i__2].r, q__3.i = d22 * a[i__2].i; + r_cnjg(&q__5, &d21); + i__3 = j + k * a_dim1; + q__4.r = q__5.r * a[i__3].r - q__5.i * a[i__3].i, + q__4.i = q__5.r * a[i__3].i + q__5.i * a[i__3] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = tt * q__2.r, q__1.i = tt * q__2.i; + wkp1.r = q__1.r, wkp1.i = q__1.i; + +/* Perform a rank-2 update of A(k+2:n,k+2:n) */ + + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + q__4.r = a[i__5].r / d__, q__4.i = a[i__5].i / + d__; + r_cnjg(&q__5, &wk); + q__3.r = q__4.r * q__5.r - q__4.i * q__5.i, + q__3.i = q__4.r * q__5.i + q__4.i * + q__5.r; + q__2.r = a[i__4].r - q__3.r, q__2.i = a[i__4].i - + q__3.i; + i__6 = i__ + (k + 1) * a_dim1; + q__7.r = a[i__6].r / d__, q__7.i = a[i__6].i / + d__; + r_cnjg(&q__8, &wkp1); + q__6.r = q__7.r * q__8.r - q__7.i * q__8.i, + q__6.i = q__7.r * q__8.i + q__7.i * + q__8.r; + q__1.r = q__2.r - q__6.r, q__1.i = q__2.i - + q__6.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L50: */ + } + +/* Store L(k) and L(k+1) in cols k and k+1 for row J */ + + i__2 = j + k * a_dim1; + q__1.r = wk.r / d__, q__1.i = wk.i / d__; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = j + (k + 1) * a_dim1; + q__1.r = wkp1.r / d__, q__1.i = wkp1.i / d__; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* (*) Make sure that diagonal element of pivot is real */ + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + q__1.r = r__1, q__1.i = 0.f; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + +/* L60: */ + } + + } + +/* Copy subdiagonal elements of D(K) to E(K) and */ +/* ZERO out subdiagonal entry of A */ + + i__1 = k; + i__2 = k + 1 + k * a_dim1; + e[i__1].r = a[i__2].r, e[i__1].i = a[i__2].i; + i__1 = k + 1; + e[i__1].r = 0.f, e[i__1].i = 0.f; + i__1 = k + 1 + k * a_dim1; + a[i__1].r = 0.f, a[i__1].i = 0.f; + + } + +/* End column K is nonsingular */ + + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -p; + ipiv[k + 1] = -kp; + } + +/* Increase K and return to the start of the main loop */ + + k += kstep; + goto L40; + +L64: + + ; + } + + return 0; + +/* End of CHETF2_RK */ + +} /* chetf2_rk__ */ + diff --git a/lapack-netlib/SRC/chetf2_rook.c b/lapack-netlib/SRC/chetf2_rook.c new file mode 100644 index 000000000..afc136641 --- /dev/null +++ b/lapack-netlib/SRC/chetf2_rook.c @@ -0,0 +1,1567 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHETF2_ROOK computes the factorization of a complex Hermitian indefinite matrix using the bound +ed Bunch-Kaufman ("rook") diagonal pivoting method (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHETF2_ROOK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHETF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* INTEGER IPIV( * ) */ +/* COMPLEX A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHETF2_ROOK computes the factorization of a complex Hermitian matrix A */ +/* > using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: */ +/* > */ +/* > A = U*D*U**H or A = L*D*L**H */ +/* > */ +/* > where U (or L) is a product of permutation and unit upper (lower) */ +/* > triangular matrices, U**H is the conjugate transpose of U, and D is */ +/* > Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. */ +/* > */ +/* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, the block diagonal matrix D and the multipliers used */ +/* > to obtain the factor U or L (see below for further details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > Details of the interchanges and the block structure of D. */ +/* > */ +/* > If UPLO = 'U': */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) were */ +/* > interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and */ +/* > columns k and -IPIV(k) were interchanged and rows and */ +/* > columns k-1 and -IPIV(k-1) were inerchaged, */ +/* > D(k-1:k,k-1:k) is a 2-by-2 diagonal block. */ +/* > */ +/* > If UPLO = 'L': */ +/* > If IPIV(k) > 0, then rows and columns k and IPIV(k) */ +/* > were interchanged and D(k,k) is a 1-by-1 diagonal block. */ +/* > */ +/* > If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and */ +/* > columns k and -IPIV(k) were interchanged and rows and */ +/* > columns k+1 and -IPIV(k+1) were inerchaged, */ +/* > D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, D(k,k) is exactly zero. The factorization */ +/* > has been completed, but the block diagonal matrix D is */ +/* > exactly singular, and division by zero will occur if it */ +/* > is used to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2013 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', then A = U*D*U**H, where */ +/* > U = P(n)*U(n)* ... *P(k)U(k)* ..., */ +/* > i.e., U is a product of terms P(k)*U(k), where k decreases from n to */ +/* > 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* > and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* > defined by IPIV(k), and U(k) is a unit upper triangular matrix, such */ +/* > that if the diagonal block D(k) is of order s (s = 1 or 2), then */ +/* > */ +/* > ( I v 0 ) k-s */ +/* > U(k) = ( 0 I 0 ) s */ +/* > ( 0 0 I ) n-k */ +/* > k-s s n-k */ +/* > */ +/* > If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). */ +/* > If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), */ +/* > and A(k,k), and v overwrites A(1:k-2,k-1:k). */ +/* > */ +/* > If UPLO = 'L', then A = L*D*L**H, where */ +/* > L = P(1)*L(1)* ... *P(k)*L(k)* ..., */ +/* > i.e., L is a product of terms P(k)*L(k), where k increases from 1 to */ +/* > n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 */ +/* > and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as */ +/* > defined by IPIV(k), and L(k) is a unit lower triangular matrix, such */ +/* > that if the diagonal block D(k) is of order s (s = 1 or 2), then */ +/* > */ +/* > ( I 0 0 ) k-1 */ +/* > L(k) = ( 0 I 0 ) s */ +/* > ( 0 v I ) n-k-s+1 */ +/* > k-1 s n-k-s+1 */ +/* > */ +/* > If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). */ +/* > If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), */ +/* > and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2013, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */ +/* > School of Mathematics, */ +/* > University of Manchester */ +/* > */ +/* > 01-01-96 - Based on modifications by */ +/* > J. Lewis, Boeing Computer Services Company */ +/* > A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ +/* > \endverbatim */ + +/* ===================================================================== */ +/* Subroutine */ int chetf2_rook_(char *uplo, integer *n, complex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + real r__1, r__2; + complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8; + + /* Local variables */ + extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, + integer *, complex *, integer *); + logical done; + integer imax, jmax; + real d__; + integer i__, j, k, p; + complex t; + real alpha; + extern logical lsame_(char *, char *); + real sfmin; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + integer itemp, kstep; + real stemp; + logical upper; + real r1, d11; + complex d12; + real d22; + complex d21; + extern real slapy2_(real *, real *); + integer ii, kk, kp; + real absakk; + complex wk; + extern integer icamax_(integer *, complex *, integer *); + extern real slamch_(char *); + real tt; + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *, ftnlen); + real colmax, rowmax; + complex wkm1, wkp1; + + +/* -- LAPACK computational routine (version 3.5.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2013 */ + + +/* ====================================================================== */ + + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETF2_ROOK", &i__1, (ftnlen)11); + return 0; + } + +/* Initialize ALPHA for use in choosing pivot block size. */ + + alpha = (sqrt(17.f) + 1.f) / 8.f; + +/* Compute machine safe minimum */ + + sfmin = slamch_("S"); + + if (upper) { + +/* Factorize A as U*D*U**H using the upper triangle of A */ + +/* K is the main loop index, decreasing from N to 1 in steps of */ +/* 1 or 2 */ + + k = *n; +L10: + +/* If K < 1, exit from loop */ + + if (k < 1) { + goto L70; + } + kstep = 1; + p = k; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (r__1 = a[i__1].r, abs(r__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k > 1) { + i__1 = k - 1; + imax = icamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[imax + + k * a_dim1]), abs(r__2)); + } else { + colmax = 0.f; + } + + if (f2cmax(absakk,colmax) == 0.f) { + +/* Column K is zero or underflow: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } else { + +/* ============================================================ */ + +/* BEGIN pivot search */ + +/* Case(1) */ +/* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX */ +/* (used to handle NaN and Inf) */ + + if (! (absakk < alpha * colmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else { + + done = FALSE_; + +/* Loop until pivot found */ + +L12: + +/* BEGIN pivot search loop body */ + + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine both ROWMAX and JMAX. */ + + if (imax != k) { + i__1 = k - imax; + jmax = imax + icamax_(&i__1, &a[imax + (imax + 1) * + a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(& + a[imax + jmax * a_dim1]), abs(r__2)); + } else { + rowmax = 0.f; + } + + if (imax > 1) { + i__1 = imax - 1; + itemp = icamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = itemp + imax * a_dim1; + stemp = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[ + itemp + imax * a_dim1]), abs(r__2)); + if (stemp > rowmax) { + rowmax = stemp; + jmax = itemp; + } + } + +/* Case(2) */ +/* Equivalent to testing for */ +/* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX */ +/* (used to handle NaN and Inf) */ + + i__1 = imax + imax * a_dim1; + if (! ((r__1 = a[i__1].r, abs(r__1)) < alpha * rowmax)) { + +/* interchange rows and columns K and IMAX, */ +/* use 1-by-1 pivot block */ + + kp = imax; + done = TRUE_; + +/* Case(3) */ +/* Equivalent to testing for ROWMAX.EQ.COLMAX, */ +/* (used to handle NaN and Inf) */ + + } else if (p == jmax || rowmax <= colmax) { + +/* interchange rows and columns K-1 and IMAX, */ +/* use 2-by-2 pivot block */ + + kp = imax; + kstep = 2; + done = TRUE_; + +/* Case(4) */ + } else { + +/* Pivot not found: set params and repeat */ + + p = imax; + colmax = rowmax; + imax = jmax; + } + +/* END pivot search loop body */ + + if (! done) { + goto L12; + } + + } + +/* END pivot search */ + +/* ============================================================ */ + +/* KK is the column of A where pivoting step stopped */ + + kk = k - kstep + 1; + +/* For only a 2x2 pivot, interchange rows and columns K and P */ +/* in the leading submatrix A(1:k,1:k) */ + + if (kstep == 2 && p != k) { +/* (1) Swap columnar parts */ + if (p > 1) { + i__1 = p - 1; + cswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 + + 1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = k - 1; + for (j = p + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + k * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + k * a_dim1; + r_cnjg(&q__1, &a[p + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = p + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L14: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = p + k * a_dim1; + r_cnjg(&q__1, &a[p + k * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = k + k * a_dim1; + r1 = a[i__1].r; + i__1 = k + k * a_dim1; + i__2 = p + p * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = p + p * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + } + +/* For both 1x1 and 2x2 pivots, interchange rows and */ +/* columns KK and KP in the leading submatrix A(1:k,1:k) */ + + if (kp != kk) { +/* (1) Swap columnar parts */ + if (kp > 1) { + i__1 = kp - 1; + cswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + + 1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + kk * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + kk * a_dim1; + r_cnjg(&q__1, &a[kp + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L15: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = kp + kk * a_dim1; + r_cnjg(&q__1, &a[kp + kk * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + + if (kstep == 2) { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; +/* (5) Swap row elements */ + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + if (kstep == 2) { + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } + } + +/* Update the leading submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k now holds */ + +/* W(k) = U(k)*D(k) */ + +/* where U(k) is the k-th column of U */ + + if (k > 1) { + +/* Perform a rank-1 update of A(1:k-1,1:k-1) and */ +/* store U(k) in column k */ + + i__1 = k + k * a_dim1; + if ((r__1 = a[i__1].r, abs(r__1)) >= sfmin) { + +/* Perform a rank-1 update of A(1:k-1,1:k-1) as */ +/* A := A - U(k)*D(k)*U(k)**T */ +/* = A - W(k)*1/D(k)*W(k)**T */ + + i__1 = k + k * a_dim1; + d11 = 1.f / a[i__1].r; + i__1 = k - 1; + r__1 = -d11; + cher_(uplo, &i__1, &r__1, &a[k * a_dim1 + 1], &c__1, & + a[a_offset], lda); + +/* Store U(k) in column k */ + + i__1 = k - 1; + csscal_(&i__1, &d11, &a[k * a_dim1 + 1], &c__1); + } else { + +/* Store L(k) in column K */ + + i__1 = k + k * a_dim1; + d11 = a[i__1].r; + i__1 = k - 1; + for (ii = 1; ii <= i__1; ++ii) { + i__2 = ii + k * a_dim1; + i__3 = ii + k * a_dim1; + q__1.r = a[i__3].r / d11, q__1.i = a[i__3].i / + d11; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L16: */ + } + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - U(k)*D(k)*U(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ +/* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T */ + + i__1 = k - 1; + r__1 = -d11; + cher_(uplo, &i__1, &r__1, &a[k * a_dim1 + 1], &c__1, & + a[a_offset], lda); + } + } + + } else { + +/* 2-by-2 pivot block D(k): columns k and k-1 now hold */ + +/* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */ + +/* where U(k) and U(k-1) are the k-th and (k-1)-th columns */ +/* of U */ + +/* Perform a rank-2 update of A(1:k-2,1:k-2) as */ + +/* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T */ +/* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T */ + +/* and store L(k) and L(k+1) in columns k and k+1 */ + + if (k > 2) { +/* D = |A12| */ + i__1 = k - 1 + k * a_dim1; + r__1 = a[i__1].r; + r__2 = r_imag(&a[k - 1 + k * a_dim1]); + d__ = slapy2_(&r__1, &r__2); + i__1 = k + k * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d11 = q__1.r; + i__1 = k - 1 + (k - 1) * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d22 = q__1.r; + i__1 = k - 1 + k * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d12.r = q__1.r, d12.i = q__1.i; + tt = 1.f / (d11 * d22 - 1.f); + + for (j = k - 2; j >= 1; --j) { + +/* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J */ + + i__1 = j + (k - 1) * a_dim1; + q__3.r = d11 * a[i__1].r, q__3.i = d11 * a[i__1].i; + r_cnjg(&q__5, &d12); + i__2 = j + k * a_dim1; + q__4.r = q__5.r * a[i__2].r - q__5.i * a[i__2].i, + q__4.i = q__5.r * a[i__2].i + q__5.i * a[i__2] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = tt * q__2.r, q__1.i = tt * q__2.i; + wkm1.r = q__1.r, wkm1.i = q__1.i; + i__1 = j + k * a_dim1; + q__3.r = d22 * a[i__1].r, q__3.i = d22 * a[i__1].i; + i__2 = j + (k - 1) * a_dim1; + q__4.r = d12.r * a[i__2].r - d12.i * a[i__2].i, + q__4.i = d12.r * a[i__2].i + d12.i * a[i__2] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = tt * q__2.r, q__1.i = tt * q__2.i; + wk.r = q__1.r, wk.i = q__1.i; + +/* Perform a rank-2 update of A(1:k-2,1:k-2) */ + + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + q__4.r = a[i__3].r / d__, q__4.i = a[i__3].i / + d__; + r_cnjg(&q__5, &wk); + q__3.r = q__4.r * q__5.r - q__4.i * q__5.i, + q__3.i = q__4.r * q__5.i + q__4.i * + q__5.r; + q__2.r = a[i__2].r - q__3.r, q__2.i = a[i__2].i - + q__3.i; + i__4 = i__ + (k - 1) * a_dim1; + q__7.r = a[i__4].r / d__, q__7.i = a[i__4].i / + d__; + r_cnjg(&q__8, &wkm1); + q__6.r = q__7.r * q__8.r - q__7.i * q__8.i, + q__6.i = q__7.r * q__8.i + q__7.i * + q__8.r; + q__1.r = q__2.r - q__6.r, q__1.i = q__2.i - + q__6.i; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* L20: */ + } + +/* Store U(k) and U(k-1) in cols k and k-1 for row J */ + + i__1 = j + k * a_dim1; + q__1.r = wk.r / d__, q__1.i = wk.i / d__; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + i__1 = j + (k - 1) * a_dim1; + q__1.r = wkm1.r / d__, q__1.i = wkm1.i / d__; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = j + j * a_dim1; + i__2 = j + j * a_dim1; + r__1 = a[i__2].r; + q__1.r = r__1, q__1.i = 0.f; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + +/* L30: */ + } + + } + + } + + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -p; + ipiv[k - 1] = -kp; + } + +/* Decrease K and return to the start of the main loop */ + + k -= kstep; + goto L10; + + } else { + +/* Factorize A as L*D*L**H using the lower triangle of A */ + +/* K is the main loop index, increasing from 1 to N in steps of */ +/* 1 or 2 */ + + k = 1; +L40: + +/* If K > N, exit from loop */ + + if (k > *n) { + goto L70; + } + kstep = 1; + p = k; + +/* Determine rows and columns to be interchanged and whether */ +/* a 1-by-1 or 2-by-2 pivot block will be used */ + + i__1 = k + k * a_dim1; + absakk = (r__1 = a[i__1].r, abs(r__1)); + +/* IMAX is the row-index of the largest off-diagonal element in */ +/* column K, and COLMAX is its absolute value. */ +/* Determine both COLMAX and IMAX. */ + + if (k < *n) { + i__1 = *n - k; + imax = k + icamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[imax + + k * a_dim1]), abs(r__2)); + } else { + colmax = 0.f; + } + + if (f2cmax(absakk,colmax) == 0.f) { + +/* Column K is zero or underflow: set INFO and continue */ + + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } else { + +/* ============================================================ */ + +/* BEGIN pivot search */ + +/* Case(1) */ +/* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX */ +/* (used to handle NaN and Inf) */ + + if (! (absakk < alpha * colmax)) { + +/* no interchange, use 1-by-1 pivot block */ + + kp = k; + + } else { + + done = FALSE_; + +/* Loop until pivot found */ + +L42: + +/* BEGIN pivot search loop body */ + + +/* JMAX is the column-index of the largest off-diagonal */ +/* element in row IMAX, and ROWMAX is its absolute value. */ +/* Determine both ROWMAX and JMAX. */ + + if (imax != k) { + i__1 = imax - k; + jmax = k - 1 + icamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(& + a[imax + jmax * a_dim1]), abs(r__2)); + } else { + rowmax = 0.f; + } + + if (imax < *n) { + i__1 = *n - imax; + itemp = imax + icamax_(&i__1, &a[imax + 1 + imax * a_dim1] + , &c__1); + i__1 = itemp + imax * a_dim1; + stemp = (r__1 = a[i__1].r, abs(r__1)) + (r__2 = r_imag(&a[ + itemp + imax * a_dim1]), abs(r__2)); + if (stemp > rowmax) { + rowmax = stemp; + jmax = itemp; + } + } + +/* Case(2) */ +/* Equivalent to testing for */ +/* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX */ +/* (used to handle NaN and Inf) */ + + i__1 = imax + imax * a_dim1; + if (! ((r__1 = a[i__1].r, abs(r__1)) < alpha * rowmax)) { + +/* interchange rows and columns K and IMAX, */ +/* use 1-by-1 pivot block */ + + kp = imax; + done = TRUE_; + +/* Case(3) */ +/* Equivalent to testing for ROWMAX.EQ.COLMAX, */ +/* (used to handle NaN and Inf) */ + + } else if (p == jmax || rowmax <= colmax) { + +/* interchange rows and columns K+1 and IMAX, */ +/* use 2-by-2 pivot block */ + + kp = imax; + kstep = 2; + done = TRUE_; + +/* Case(4) */ + } else { + +/* Pivot not found: set params and repeat */ + + p = imax; + colmax = rowmax; + imax = jmax; + } + + +/* END pivot search loop body */ + + if (! done) { + goto L42; + } + + } + +/* END pivot search */ + +/* ============================================================ */ + +/* KK is the column of A where pivoting step stopped */ + + kk = k + kstep - 1; + +/* For only a 2x2 pivot, interchange rows and columns K and P */ +/* in the trailing submatrix A(k:n,k:n) */ + + if (kstep == 2 && p != k) { +/* (1) Swap columnar parts */ + if (p < *n) { + i__1 = *n - p; + cswap_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + 1 + p + * a_dim1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = p - 1; + for (j = k + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + k * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + k * a_dim1; + r_cnjg(&q__1, &a[p + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = p + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L44: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = p + k * a_dim1; + r_cnjg(&q__1, &a[p + k * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = k + k * a_dim1; + r1 = a[i__1].r; + i__1 = k + k * a_dim1; + i__2 = p + p * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = p + p * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + } + +/* For both 1x1 and 2x2 pivots, interchange rows and */ +/* columns KK and KP in the trailing submatrix A(k:n,k:n) */ + + if (kp != kk) { +/* (1) Swap columnar parts */ + if (kp < *n) { + i__1 = *n - kp; + cswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + + kp * a_dim1], &c__1); + } +/* (2) Swap and conjugate middle parts */ + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + r_cnjg(&q__1, &a[j + kk * a_dim1]); + t.r = q__1.r, t.i = q__1.i; + i__2 = j + kk * a_dim1; + r_cnjg(&q__1, &a[kp + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; +/* L45: */ + } +/* (3) Swap and conjugate corner elements at row-col interserction */ + i__1 = kp + kk * a_dim1; + r_cnjg(&q__1, &a[kp + kk * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; +/* (4) Swap diagonal elements at row-col intersection */ + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.f; + + if (kstep == 2) { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; +/* (5) Swap row elements */ + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { +/* (*) Make sure that diagonal element of pivot is real */ + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + if (kstep == 2) { + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } + } + +/* Update the trailing submatrix */ + + if (kstep == 1) { + +/* 1-by-1 pivot block D(k): column k of A now holds */ + +/* W(k) = L(k)*D(k), */ + +/* where L(k) is the k-th column of L */ + + if (k < *n) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) and */ +/* store L(k) in column k */ + +/* Handle division by a small number */ + + i__1 = k + k * a_dim1; + if ((r__1 = a[i__1].r, abs(r__1)) >= sfmin) { + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - L(k)*D(k)*L(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ + + i__1 = k + k * a_dim1; + d11 = 1.f / a[i__1].r; + i__1 = *n - k; + r__1 = -d11; + cher_(uplo, &i__1, &r__1, &a[k + 1 + k * a_dim1], & + c__1, &a[k + 1 + (k + 1) * a_dim1], lda); + +/* Store L(k) in column k */ + + i__1 = *n - k; + csscal_(&i__1, &d11, &a[k + 1 + k * a_dim1], &c__1); + } else { + +/* Store L(k) in column k */ + + i__1 = k + k * a_dim1; + d11 = a[i__1].r; + i__1 = *n; + for (ii = k + 1; ii <= i__1; ++ii) { + i__2 = ii + k * a_dim1; + i__3 = ii + k * a_dim1; + q__1.r = a[i__3].r / d11, q__1.i = a[i__3].i / + d11; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L46: */ + } + +/* Perform a rank-1 update of A(k+1:n,k+1:n) as */ +/* A := A - L(k)*D(k)*L(k)**T */ +/* = A - W(k)*(1/D(k))*W(k)**T */ +/* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T */ + + i__1 = *n - k; + r__1 = -d11; + cher_(uplo, &i__1, &r__1, &a[k + 1 + k * a_dim1], & + c__1, &a[k + 1 + (k + 1) * a_dim1], lda); + } + } + + } else { + +/* 2-by-2 pivot block D(k): columns k and k+1 now hold */ + +/* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */ + +/* where L(k) and L(k+1) are the k-th and (k+1)-th columns */ +/* of L */ + + +/* Perform a rank-2 update of A(k+2:n,k+2:n) as */ + +/* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T */ +/* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T */ + +/* and store L(k) and L(k+1) in columns k and k+1 */ + + if (k < *n - 1) { +/* D = |A21| */ + i__1 = k + 1 + k * a_dim1; + r__1 = a[i__1].r; + r__2 = r_imag(&a[k + 1 + k * a_dim1]); + d__ = slapy2_(&r__1, &r__2); + i__1 = k + 1 + (k + 1) * a_dim1; + d11 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d22 = a[i__1].r / d__; + i__1 = k + 1 + k * a_dim1; + q__1.r = a[i__1].r / d__, q__1.i = a[i__1].i / d__; + d21.r = q__1.r, d21.i = q__1.i; + tt = 1.f / (d11 * d22 - 1.f); + + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + +/* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J */ + + i__2 = j + k * a_dim1; + q__3.r = d11 * a[i__2].r, q__3.i = d11 * a[i__2].i; + i__3 = j + (k + 1) * a_dim1; + q__4.r = d21.r * a[i__3].r - d21.i * a[i__3].i, + q__4.i = d21.r * a[i__3].i + d21.i * a[i__3] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = tt * q__2.r, q__1.i = tt * q__2.i; + wk.r = q__1.r, wk.i = q__1.i; + i__2 = j + (k + 1) * a_dim1; + q__3.r = d22 * a[i__2].r, q__3.i = d22 * a[i__2].i; + r_cnjg(&q__5, &d21); + i__3 = j + k * a_dim1; + q__4.r = q__5.r * a[i__3].r - q__5.i * a[i__3].i, + q__4.i = q__5.r * a[i__3].i + q__5.i * a[i__3] + .r; + q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; + q__1.r = tt * q__2.r, q__1.i = tt * q__2.i; + wkp1.r = q__1.r, wkp1.i = q__1.i; + +/* Perform a rank-2 update of A(k+2:n,k+2:n) */ + + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + q__4.r = a[i__5].r / d__, q__4.i = a[i__5].i / + d__; + r_cnjg(&q__5, &wk); + q__3.r = q__4.r * q__5.r - q__4.i * q__5.i, + q__3.i = q__4.r * q__5.i + q__4.i * + q__5.r; + q__2.r = a[i__4].r - q__3.r, q__2.i = a[i__4].i - + q__3.i; + i__6 = i__ + (k + 1) * a_dim1; + q__7.r = a[i__6].r / d__, q__7.i = a[i__6].i / + d__; + r_cnjg(&q__8, &wkp1); + q__6.r = q__7.r * q__8.r - q__7.i * q__8.i, + q__6.i = q__7.r * q__8.i + q__7.i * + q__8.r; + q__1.r = q__2.r - q__6.r, q__1.i = q__2.i - + q__6.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L50: */ + } + +/* Store L(k) and L(k+1) in cols k and k+1 for row J */ + + i__2 = j + k * a_dim1; + q__1.r = wk.r / d__, q__1.i = wk.i / d__; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = j + (k + 1) * a_dim1; + q__1.r = wkp1.r / d__, q__1.i = wkp1.i / d__; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* (*) Make sure that diagonal element of pivot is real */ + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + q__1.r = r__1, q__1.i = 0.f; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + +/* L60: */ + } + + } + + } + + } + +/* Store details of the interchanges in IPIV */ + + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -p; + ipiv[k + 1] = -kp; + } + +/* Increase K and return to the start of the main loop */ + + k += kstep; + goto L40; + + } + +L70: + + return 0; + +/* End of CHETF2_ROOK */ + +} /* chetf2_rook__ */ + diff --git a/lapack-netlib/SRC/chetrd.c b/lapack-netlib/SRC/chetrd.c new file mode 100644 index 000000000..c45dab47d --- /dev/null +++ b/lapack-netlib/SRC/chetrd.c @@ -0,0 +1,814 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHETRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHETRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* REAL D( * ), E( * ) */ +/* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHETRD reduces a complex Hermitian matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is REAL array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1. */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n-1) . . . H(2) H(1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* > A(1:i-1,i+1), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(n-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( d e v2 v3 v4 ) ( d ) */ +/* > ( d e v3 v4 ) ( e d ) */ +/* > ( d e v4 ) ( v1 e d ) */ +/* > ( d e ) ( v1 v2 e d ) */ +/* > ( d ) ( v1 v2 v3 e d ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, complex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1; + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *); + integer nbmin, iinfo; + logical upper; + extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer + *, real *, real *, complex *, integer *), cher2k_(char *, + char *, integer *, integer *, complex *, complex *, integer *, + complex *, integer *, real *, complex *, integer *); + integer nb, kk, nx; + extern /* Subroutine */ int clatrd_(char *, integer *, integer *, complex + *, integer *, real *, complex *, complex *, integer *), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* Determine when to cross over from blocked to unblocked code */ +/* (last block is always handled by unblocked code). */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code by setting NX = N. */ + +/* Computing MAX */ + i__1 = *lwork / ldwork; + nb = f2cmax(i__1,1); + nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* Reduce the upper triangle of A. */ +/* Columns 1:kk are handled by the unblocked method. */ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = i__ + nb - 1; + clatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* Update the unreduced submatrix A(1:i-1,1:i-1), using an */ +/* update of the form: A := A - V*W**H - W*V**H */ + + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = 0.f; + cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ * a_dim1 + + 1], lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda); + +/* Copy superdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j - 1 + j * a_dim1; + i__5 = j - 1; + a[i__4].r = e[i__5], a[i__4].i = 0.f; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + chetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = *n - i__ + 1; + clatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* Update the unreduced submatrix A(i+nb:n,i+nb:n), using */ +/* an update of the form: A := A - V*W**H - W*V**H */ + + i__3 = *n - i__ - nb + 1; + q__1.r = -1.f, q__1.i = 0.f; + cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b23, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy subdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.f; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + chetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CHETRD */ + +} /* chetrd_ */ + diff --git a/lapack-netlib/SRC/chetrd_2stage.c b/lapack-netlib/SRC/chetrd_2stage.c new file mode 100644 index 000000000..ff3f1bb58 --- /dev/null +++ b/lapack-netlib/SRC/chetrd_2stage.c @@ -0,0 +1,744 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHETRD_2STAGE */ + +/* @generated from zhetrd_2stage.f, fortran z -> c, Sun Nov 6 19:34:06 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHETRD_2STAGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, */ +/* HOUS2, LHOUS2, WORK, LWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER VECT, UPLO */ +/* INTEGER N, LDA, LWORK, LHOUS2, INFO */ +/* REAL D( * ), E( * ) */ +/* COMPLEX A( LDA, * ), TAU( * ), */ +/* HOUS2( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHETRD_2STAGE reduces a complex Hermitian matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q1**H Q2**H* A * Q2 * Q1 = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > = 'N': No need for the Housholder representation, */ +/* > in particular for the second stage (Band to */ +/* > tridiagonal) and thus LHOUS2 is of size f2cmax(1, 4*N); */ +/* > = 'V': the Householder representation is needed to */ +/* > either generate Q1 Q2 or to apply Q1 Q2, */ +/* > then LHOUS2 is to be queried and computed. */ +/* > (NOT AVAILABLE IN THIS RELEASE). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the band superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > internal band-diagonal matrix AB, and the elements above */ +/* > the KD superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q1 as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and band subdiagonal of A are over- */ +/* > written by the corresponding elements of the internal band-diagonal */ +/* > matrix AB, and the elements below the KD subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q1 as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is REAL array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX array, dimension (N-KD) */ +/* > The scalar factors of the elementary reflectors of */ +/* > the first stage (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] HOUS2 */ +/* > \verbatim */ +/* > HOUS2 is COMPLEX array, dimension (LHOUS2) */ +/* > Stores the Householder representation of the stage2 */ +/* > band to tridiagonal. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LHOUS2 */ +/* > \verbatim */ +/* > LHOUS2 is INTEGER */ +/* > The dimension of the array HOUS2. */ +/* > If LWORK = -1, or LHOUS2=-1, */ +/* > then a query is assumed; the routine */ +/* > only calculates the optimal size of the HOUS2 array, returns */ +/* > this value as the first entry of the HOUS2 array, and no error */ +/* > message related to LHOUS2 is issued by XERBLA. */ +/* > If VECT='N', LHOUS2 = f2cmax(1, 4*n); */ +/* > if VECT='V', option not yet available. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK = MAX(1, dimension) */ +/* > If LWORK = -1, or LHOUS2 = -1, */ +/* > then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = f2cmax(stage1,stage2) + (KD+1)*N */ +/* > = N*KD + N*f2cmax(KD+1,FACTOPTNB) */ +/* > + f2cmax(2*KD*KD, KD*NTHREADS) */ +/* > + (KD+1)*N */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Implemented by Azzam Haidar. */ +/* > */ +/* > All details are available on technical report, SC11, SC13 papers. */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int chetrd_2stage_(char *vect, char *uplo, integer *n, + complex *a, integer *lda, real *d__, real *e, complex *tau, complex * + hous2, integer *lhous2, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + integer ldab; + extern /* Subroutine */ int chetrd_hb2st_(char *, char *, char *, + integer *, integer *, complex *, integer *, real *, real *, + complex *, integer *, complex *, integer *, integer *); + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + integer lwrk, wpos; + extern logical lsame_(char *, char *); + integer abpos, lhmin, lwmin; + logical wantq, upper; + integer ib, kd; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical lquery; + extern /* Subroutine */ int chetrd_he2hb_(char *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, complex *, + integer *, integer *); + + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tau; + --hous2; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1 || *lhous2 == -1; + +/* Determine the block size, the workspace size and the hous size. */ + + kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", vect, n, &c_n1, &c_n1, &c_n1); + ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", vect, n, &kd, &c_n1, &c_n1); + lhmin = ilaenv2stage_(&c__3, "CHETRD_2STAGE", vect, n, &kd, &ib, &c_n1); + lwmin = ilaenv2stage_(&c__4, "CHETRD_2STAGE", vect, n, &kd, &ib, &c_n1); +/* WRITE(*,*),'CHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, */ +/* $ LHMIN, LWMIN */ + + if (! lsame_(vect, "N")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*lhous2 < lhmin && ! lquery) { + *info = -10; + } else if (*lwork < lwmin && ! lquery) { + *info = -12; + } + + if (*info == 0) { + hous2[1].r = (real) lhmin, hous2[1].i = 0.f; + work[1].r = (real) lwmin, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETRD_2STAGE", &i__1, (ftnlen)13); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* Determine pointer position */ + + ldab = kd + 1; + lwrk = *lwork - ldab * *n; + abpos = 1; + wpos = abpos + ldab * *n; + chetrd_he2hb_(uplo, n, &kd, &a[a_offset], lda, &work[abpos], &ldab, &tau[ + 1], &work[wpos], &lwrk, info); + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETRD_HE2HB", &i__1, (ftnlen)12); + return 0; + } + chetrd_hb2st_("Y", vect, uplo, n, &kd, &work[abpos], &ldab, &d__[1], &e[ + 1], &hous2[1], lhous2, &work[wpos], &lwrk, info); + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETRD_HB2ST", &i__1, (ftnlen)12); + return 0; + } + + + hous2[1].r = (real) lhmin, hous2[1].i = 0.f; + work[1].r = (real) lwmin, work[1].i = 0.f; + return 0; + +/* End of CHETRD_2STAGE */ + +} /* chetrd_2stage__ */ + diff --git a/lapack-netlib/SRC/chetrd_hb2st.c b/lapack-netlib/SRC/chetrd_hb2st.c new file mode 100644 index 000000000..daa316f0c --- /dev/null +++ b/lapack-netlib/SRC/chetrd_hb2st.c @@ -0,0 +1,998 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) ceil(w) +#define myhuge_(w) HUGE_VAL +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHBTRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHBTRD_HB2ST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, */ +/* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) */ + +/* #if defined(_OPENMP) */ +/* use omp_lib */ +/* #endif */ + +/* IMPLICIT NONE */ + +/* CHARACTER STAGE1, UPLO, VECT */ +/* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO */ +/* REAL D( * ), E( * ) */ +/* COMPLEX AB( LDAB, * ), HOUS( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] STAGE1 */ +/* > \verbatim */ +/* > STAGE1 is CHARACTER*1 */ +/* > = 'N': "No": to mention that the stage 1 of the reduction */ +/* > from dense to band using the chetrd_he2hb routine */ +/* > was not called before this routine to reproduce AB. */ +/* > In other term this routine is called as standalone. */ +/* > = 'Y': "Yes": to mention that the stage 1 of the */ +/* > reduction from dense to band using the chetrd_he2hb */ +/* > routine has been called to produce AB (e.g., AB is */ +/* > the output of chetrd_he2hb. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > = 'N': No need for the Housholder representation, */ +/* > and thus LHOUS is of size f2cmax(1, 4*N); */ +/* > = 'V': the Householder representation is needed to */ +/* > either generate or to apply Q later on, */ +/* > then LHOUS is to be queried and computed. */ +/* > (NOT AVAILABLE IN THIS RELEASE). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the matrix A if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB,N) */ +/* > On entry, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > On exit, the diagonal elements of AB are overwritten by the */ +/* > diagonal elements of the tridiagonal matrix T; if KD > 0, the */ +/* > elements on the first superdiagonal (if UPLO = 'U') or the */ +/* > first subdiagonal (if UPLO = 'L') are overwritten by the */ +/* > off-diagonal elements of T; the rest of AB is overwritten by */ +/* > values generated during the reduction. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is REAL array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] HOUS */ +/* > \verbatim */ +/* > HOUS is COMPLEX array, dimension LHOUS, that */ +/* > store the Householder representation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LHOUS */ +/* > \verbatim */ +/* > LHOUS is INTEGER */ +/* > The dimension of the array HOUS. LHOUS = MAX(1, dimension) */ +/* > If LWORK = -1, or LHOUS=-1, */ +/* > then a query is assumed; the routine */ +/* > only calculates the optimal size of the HOUS array, returns */ +/* > this value as the first entry of the HOUS array, and no error */ +/* > message related to LHOUS is issued by XERBLA. */ +/* > LHOUS = MAX(1, dimension) where */ +/* > dimension = 4*N if VECT='N' */ +/* > not available now if VECT='H' */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK = MAX(1, dimension) */ +/* > If LWORK = -1, or LHOUS=-1, */ +/* > then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > LWORK = MAX(1, dimension) where */ +/* > dimension = (2KD+1)*N + KD*NTHREADS */ +/* > where KD is the blocking size of the reduction, */ +/* > FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice */ +/* > NTHREADS is the number of threads used when */ +/* > openMP compilation is enabled, otherwise =1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Implemented by Azzam Haidar. */ +/* > */ +/* > All details are available on technical report, SC11, SC13 papers. */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int chetrd_hb2st_(char *stage1, char *vect, char *uplo, + integer *n, integer *kd, complex *ab, integer *ldab, real *d__, real * + e, complex *hous, integer *lhous, complex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1; + + /* Local variables */ + integer inda; + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + integer thed, indv, myid, indw, apos, dpos, abofdpos, nthreads, i__, k, m, + edind, debug; + extern logical lsame_(char *, char *); + integer lhmin, sicev, sizea, shift, stind, colpt, lwmin, awpos; + logical wantq, upper; + integer grsiz, ttype, stepercol, ed, ib; + extern /* Subroutine */ int chb2st_kernels_(char *, logical *, integer *, + integer *, integer *, integer *, integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, complex *); + integer st, abdpos; + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), claset_(char *, + integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *, ftnlen); + integer thgrid, thgrnb, indtau; + real abstmp; + integer ofdpos, blklastind; + extern /* Subroutine */ int mecago_(); + logical lquery, afters1; + integer lda, tid, ldv; + complex tmp; + integer stt, sweepid, nbtiles, sizetau, thgrsiz; + + + + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Determine the minimal workspace size required. */ +/* Test the input parameters */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --d__; + --e; + --hous; + --work; + + /* Function Body */ + debug = 0; + *info = 0; + afters1 = lsame_(stage1, "Y"); + wantq = lsame_(vect, "V"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1 || *lhous == -1; + +/* Determine the block size, the workspace size and the hous size. */ + + ib = ilaenv2stage_(&c__2, "CHETRD_HB2ST", vect, n, kd, &c_n1, &c_n1); + lhmin = ilaenv2stage_(&c__3, "CHETRD_HB2ST", vect, n, kd, &ib, &c_n1); + lwmin = ilaenv2stage_(&c__4, "CHETRD_HB2ST", vect, n, kd, &ib, &c_n1); + + if (! afters1 && ! lsame_(stage1, "N")) { + *info = -1; + } else if (! lsame_(vect, "N")) { + *info = -2; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*kd < 0) { + *info = -5; + } else if (*ldab < *kd + 1) { + *info = -7; + } else if (*lhous < lhmin && ! lquery) { + *info = -11; + } else if (*lwork < lwmin && ! lquery) { + *info = -13; + } + + if (*info == 0) { + hous[1].r = (real) lhmin, hous[1].i = 0.f; + work[1].r = (real) lwmin, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETRD_HB2ST", &i__1, (ftnlen)12); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + hous[1].r = 1.f, hous[1].i = 0.f; + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* Determine pointer position */ + + ldv = *kd + ib; + sizetau = *n << 1; + sicev = *n << 1; + indtau = 1; + indv = indtau + sizetau; + lda = (*kd << 1) + 1; + sizea = lda * *n; + inda = 1; + indw = inda + sizea; + nthreads = 1; + tid = 0; + + if (upper) { + apos = inda + *kd; + awpos = inda; + dpos = apos + *kd; + ofdpos = dpos - 1; + abdpos = *kd + 1; + abofdpos = *kd; + } else { + apos = inda; + awpos = inda + *kd + 1; + dpos = apos; + ofdpos = dpos + 1; + abdpos = 1; + abofdpos = 2; + } + +/* Case KD=0: */ +/* The matrix is diagonal. We just copy it (convert to "real" for */ +/* complex because D is double and the imaginary part should be 0) */ +/* and store it in D. A sequential code here is better or */ +/* in a parallel environment it might need two cores for D and E */ + + if (*kd == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = abdpos + i__ * ab_dim1; + d__[i__] = ab[i__2].r; +/* L30: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + e[i__] = 0.f; +/* L40: */ + } + + hous[1].r = 1.f, hous[1].i = 0.f; + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* Case KD=1: */ +/* The matrix is already Tridiagonal. We have to make diagonal */ +/* and offdiagonal elements real, and store them in D and E. */ +/* For that, for real precision just copy the diag and offdiag */ +/* to D and E while for the COMPLEX case the bulge chasing is */ +/* performed to convert the hermetian tridiagonal to symmetric */ +/* tridiagonal. A simpler coversion formula might be used, but then */ +/* updating the Q matrix will be required and based if Q is generated */ +/* or not this might complicate the story. */ + + if (*kd == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = abdpos + i__ * ab_dim1; + d__[i__] = ab[i__2].r; +/* L50: */ + } + +/* make off-diagonal elements real and copy them to E */ + + if (upper) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = abofdpos + (i__ + 1) * ab_dim1; + tmp.r = ab[i__2].r, tmp.i = ab[i__2].i; + abstmp = c_abs(&tmp); + i__2 = abofdpos + (i__ + 1) * ab_dim1; + ab[i__2].r = abstmp, ab[i__2].i = 0.f; + e[i__] = abstmp; + if (abstmp != 0.f) { + q__1.r = tmp.r / abstmp, q__1.i = tmp.i / abstmp; + tmp.r = q__1.r, tmp.i = q__1.i; + } else { + tmp.r = 1.f, tmp.i = 0.f; + } + if (i__ < *n - 1) { + i__2 = abofdpos + (i__ + 2) * ab_dim1; + i__3 = abofdpos + (i__ + 2) * ab_dim1; + q__1.r = ab[i__3].r * tmp.r - ab[i__3].i * tmp.i, q__1.i = + ab[i__3].r * tmp.i + ab[i__3].i * tmp.r; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; + } +/* IF( WANTZ ) THEN */ +/* CALL CSCAL( N, CONJG( TMP ), Q( 1, I+1 ), 1 ) */ +/* END IF */ +/* L60: */ + } + } else { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = abofdpos + i__ * ab_dim1; + tmp.r = ab[i__2].r, tmp.i = ab[i__2].i; + abstmp = c_abs(&tmp); + i__2 = abofdpos + i__ * ab_dim1; + ab[i__2].r = abstmp, ab[i__2].i = 0.f; + e[i__] = abstmp; + if (abstmp != 0.f) { + q__1.r = tmp.r / abstmp, q__1.i = tmp.i / abstmp; + tmp.r = q__1.r, tmp.i = q__1.i; + } else { + tmp.r = 1.f, tmp.i = 0.f; + } + if (i__ < *n - 1) { + i__2 = abofdpos + (i__ + 1) * ab_dim1; + i__3 = abofdpos + (i__ + 1) * ab_dim1; + q__1.r = ab[i__3].r * tmp.r - ab[i__3].i * tmp.i, q__1.i = + ab[i__3].r * tmp.i + ab[i__3].i * tmp.r; + ab[i__2].r = q__1.r, ab[i__2].i = q__1.i; + } +/* IF( WANTQ ) THEN */ +/* CALL CSCAL( N, TMP, Q( 1, I+1 ), 1 ) */ +/* END IF */ +/* L70: */ + } + } + + hous[1].r = 1.f, hous[1].i = 0.f; + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* Main code start here. */ +/* Reduce the hermitian band of A to a tridiagonal matrix. */ + + thgrsiz = *n; + grsiz = 1; + shift = 3; + r__1 = (real) (*n) / (real) (*kd) + .5f; + nbtiles = r_int(&r__1); + r__1 = (real) shift / (real) grsiz + .5f; + stepercol = r_int(&r__1); + r__1 = (real) (*n - 1) / (real) thgrsiz + .5f; + thgrnb = r_int(&r__1); + + i__1 = *kd + 1; + clacpy_("A", &i__1, n, &ab[ab_offset], ldab, &work[apos], &lda) + ; + claset_("A", kd, n, &c_b1, &c_b1, &work[awpos], &lda); + + +/* openMP parallelisation start here */ + + +/* main bulge chasing loop */ + + i__1 = thgrnb; + for (thgrid = 1; thgrid <= i__1; ++thgrid) { + stt = (thgrid - 1) * thgrsiz + 1; +/* Computing MIN */ + i__2 = stt + thgrsiz - 1, i__3 = *n - 1; + thed = f2cmin(i__2,i__3); + i__2 = *n - 1; + for (i__ = stt; i__ <= i__2; ++i__) { + ed = f2cmin(i__,thed); + if (stt > ed) { + myexit_(); + } + i__3 = stepercol; + for (m = 1; m <= i__3; ++m) { + st = stt; + i__4 = ed; + for (sweepid = st; sweepid <= i__4; ++sweepid) { + i__5 = grsiz; + for (k = 1; k <= i__5; ++k) { + myid = (i__ - sweepid) * (stepercol * grsiz) + (m - 1) + * grsiz + k; + if (myid == 1) { + ttype = 1; + } else { + ttype = myid % 2 + 2; + } + if (ttype == 2) { + colpt = myid / 2 * *kd + sweepid; + stind = colpt - *kd + 1; + edind = f2cmin(colpt,*n); + blklastind = colpt; + } else { + colpt = (myid + 1) / 2 * *kd + sweepid; + stind = colpt - *kd + 1; + edind = f2cmin(colpt,*n); + if (stind >= edind - 1 && edind == *n) { + blklastind = *n; + } else { + blklastind = 0; + } + } + +/* Call the kernel */ + + chb2st_kernels_(uplo, &wantq, &ttype, &stind, &edind, + &sweepid, n, kd, &ib, &work[inda], &lda, & + hous[indv], &hous[indtau], &ldv, &work[indw + + tid * *kd]); + if (blklastind >= *n - 1) { + ++stt; + myexit_(); + } +/* L140: */ + } +/* L130: */ + } +/* L120: */ + } +/* L110: */ + } +/* L100: */ + } + + +/* Copy the diagonal from A to D. Note that D is REAL thus only */ +/* the Real part is needed, the imaginary part should be zero. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = dpos + (i__ - 1) * lda; + d__[i__] = work[i__2].r; +/* L150: */ + } + +/* Copy the off diagonal from A to E. Note that E is REAL thus only */ +/* the Real part is needed, the imaginary part should be zero. */ + + if (upper) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ofdpos + i__ * lda; + e[i__] = work[i__2].r; +/* L160: */ + } + } else { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ofdpos + (i__ - 1) * lda; + e[i__] = work[i__2].r; +/* L170: */ + } + } + + hous[1].r = (real) lhmin, hous[1].i = 0.f; + work[1].r = (real) lwmin, work[1].i = 0.f; + return 0; + +/* End of CHETRD_HB2ST */ + +} /* chetrd_hb2st__ */ + diff --git a/lapack-netlib/SRC/chetrd_he2hb.c b/lapack-netlib/SRC/chetrd_he2hb.c new file mode 100644 index 000000000..fe1176013 --- /dev/null +++ b/lapack-netlib/SRC/chetrd_he2hb.c @@ -0,0 +1,962 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CHETRD_HE2HB */ + +/* @generated from zhetrd_he2hb.f, fortran z -> c, Wed Dec 7 08:22:40 2016 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CHETRD_HE2HB + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, */ +/* WORK, LWORK, INFO ) */ + +/* IMPLICIT NONE */ + +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDAB, LWORK, N, KD */ +/* COMPLEX A( LDA, * ), AB( LDAB, * ), */ +/* TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian */ +/* > band-diagonal form AB by a unitary similarity transformation: */ +/* > Q**H * A * Q = AB. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KD */ +/* > \verbatim */ +/* > KD is INTEGER */ +/* > The number of superdiagonals of the reduced matrix if UPLO = 'U', */ +/* > or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ +/* > The reduced matrix is stored in the array AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AB */ +/* > \verbatim */ +/* > AB is COMPLEX array, dimension (LDAB,N) */ +/* > On exit, the upper or lower triangle of the Hermitian band */ +/* > matrix A, stored in the first KD+1 rows of the array. The */ +/* > j-th column of A is stored in the j-th column of the array AB */ +/* > as follows: */ +/* > if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */ +/* > if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=f2cmin(n,j+kd). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KD+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX array, dimension (N-KD) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX array, dimension (LWORK) */ +/* > On exit, if INFO = 0, or if LWORK=-1, */ +/* > WORK(1) returns the size of LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK which should be calculated */ +/* > by a workspace query. LWORK = MAX(1, LWORK_QUERY) */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > LWORK_QUERY = N*KD + N*f2cmax(KD,FACTOPTNB) + 2*KD*KD */ +/* > where FACTOPTNB is the blocking used by the QR or LQ */ +/* > algorithm, usually FACTOPTNB=128 is a good choice otherwise */ +/* > putting LWORK=-1 will provide the size of WORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup complexHEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Implemented by Azzam Haidar. */ +/* > */ +/* > All details are available on technical report, SC11, SC13 papers. */ +/* > */ +/* > Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */ +/* > Parallel reduction to condensed forms for symmetric eigenvalue problems */ +/* > using aggregated fine-grained and memory-aware kernels. In Proceedings */ +/* > of 2011 International Conference for High Performance Computing, */ +/* > Networking, Storage and Analysis (SC '11), New York, NY, USA, */ +/* > Article 8 , 11 pages. */ +/* > http://doi.acm.org/10.1145/2063384.2063394 */ +/* > */ +/* > A. Haidar, J. Kurzak, P. Luszczek, 2013. */ +/* > An improved parallel singular value algorithm and its implementation */ +/* > for multicore hardware, In Proceedings of 2013 International Conference */ +/* > for High Performance Computing, Networking, Storage and Analysis (SC '13). */ +/* > Denver, Colorado, USA, 2013. */ +/* > Article 90, 12 pages. */ +/* > http://doi.acm.org/10.1145/2503210.2503292 */ +/* > */ +/* > A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */ +/* > A novel hybrid CPU-GPU generalized eigensolver for electronic structure */ +/* > calculations based on fine-grained memory aware tasks. */ +/* > International Journal of High Performance Computing Applications. */ +/* > Volume 28 Issue 2, Pages 196-209, May 2014. */ +/* > http://hpc.sagepub.com/content/28/2/196 */ +/* > */ +/* > \endverbatim */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd. */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in */ +/* > A(i,i+kd+1:n), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = n-kd. */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in */ +/* > A(i+kd+2:n,i), and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( ab ab/v1 v1 v1 v1 ) ( ab ) */ +/* > ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) */ +/* > ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) */ +/* > ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) */ +/* > ( ab ) ( v1 v2 v3 ab/v4 ab ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int chetrd_he2hb_(char *uplo, integer *n, integer *kd, + complex *a, integer *lda, complex *ab, integer *ldab, complex *tau, + complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, + i__5; + complex q__1; + + /* Local variables */ + extern integer ilaenv2stage_(integer *, char *, char *, integer *, + integer *, integer *, integer *); + integer tpos, wpos, s1pos, s2pos, i__, j; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *), chemm_(char *, + char *, integer *, integer *, complex *, complex *, integer *, + complex *, integer *, complex *, complex *, integer *); + extern logical lsame_(char *, char *); + integer iinfo; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *); + integer lwmin; + logical upper; + extern /* Subroutine */ int cher2k_(char *, char *, integer *, integer *, + complex *, complex *, integer *, complex *, integer *, real *, + complex *, integer *); + integer lk, pk, pn, lt; + extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *); + integer lw; + extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), clarft_( + char *, char *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *), claset_(char *, + integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *, ftnlen); + integer ls1; + logical lquery; + integer ls2, ldt, ldw, lds1, lds2; + + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Determine the minimal workspace size required */ +/* and test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + lwmin = ilaenv2stage_(&c__4, "CHETRD_HE2HB", "", n, kd, &c_n1, &c_n1); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kd < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *kd + 1; + if (*ldab < f2cmax(i__1,i__2)) { + *info = -7; + } else if (*lwork < lwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETRD_HE2HB", &i__1, (ftnlen)12); + return 0; + } else if (lquery) { + work[1].r = (real) lwmin, work[1].i = 0.f; + return 0; + } + +/* Quick return if possible */ +/* Copy the upper/lower portion of A into AB */ + + if (*n <= *kd + 1) { + if (upper) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ + i__2 = *kd + 1; + lk = f2cmin(i__2,i__); + ccopy_(&lk, &a[i__ - lk + 1 + i__ * a_dim1], &c__1, &ab[*kd + + 1 - lk + 1 + i__ * ab_dim1], &c__1); +/* L100: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ + i__2 = *kd + 1, i__3 = *n - i__ + 1; + lk = f2cmin(i__2,i__3); + ccopy_(&lk, &a[i__ + i__ * a_dim1], &c__1, &ab[i__ * ab_dim1 + + 1], &c__1); +/* L110: */ + } + } + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* Determine the pointer position for the workspace */ + + ldt = *kd; + lds1 = *kd; + lt = ldt * *kd; + lw = *n * *kd; + ls1 = lds1 * *kd; + ls2 = lwmin - lt - lw - ls1; +/* LS2 = N*MAX(KD,FACTOPTNB) */ + tpos = 1; + wpos = tpos + lt; + s1pos = wpos + lw; + s2pos = s1pos + ls1; + if (upper) { + ldw = *kd; + lds2 = *kd; + } else { + ldw = *n; + lds2 = *n; + } + + +/* Set the workspace of the triangular matrix T to zero once such a */ +/* way every time T is generated the upper/lower portion will be always zero */ + + claset_("A", &ldt, kd, &c_b1, &c_b1, &work[tpos], &ldt); + + if (upper) { + i__1 = *n - *kd; + i__2 = *kd; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + pn = *n - i__ - *kd + 1; +/* Computing MIN */ + i__3 = *n - i__ - *kd + 1; + pk = f2cmin(i__3,*kd); + +/* Compute the LQ factorization of the current block */ + + cgelqf_(kd, &pn, &a[i__ + (i__ + *kd) * a_dim1], lda, &tau[i__], & + work[s2pos], &ls2, &iinfo); + +/* Copy the upper portion of A into AB */ + + i__3 = i__ + pk - 1; + for (j = i__; j <= i__3; ++j) { +/* Computing MIN */ + i__4 = *kd, i__5 = *n - j; + lk = f2cmin(i__4,i__5) + 1; + i__4 = *ldab - 1; + ccopy_(&lk, &a[j + j * a_dim1], lda, &ab[*kd + 1 + j * + ab_dim1], &i__4); +/* L20: */ + } + + claset_("Lower", &pk, &pk, &c_b1, &c_b2, &a[i__ + (i__ + *kd) * + a_dim1], lda); + +/* Form the matrix T */ + + clarft_("Forward", "Rowwise", &pn, &pk, &a[i__ + (i__ + *kd) * + a_dim1], lda, &tau[i__], &work[tpos], &ldt); + +/* Compute W: */ + + cgemm_("Conjugate", "No transpose", &pk, &pn, &pk, &c_b2, &work[ + tpos], &ldt, &a[i__ + (i__ + *kd) * a_dim1], lda, &c_b1, & + work[s2pos], &lds2); + + chemm_("Right", uplo, &pk, &pn, &c_b2, &a[i__ + *kd + (i__ + *kd) + * a_dim1], lda, &work[s2pos], &lds2, &c_b1, &work[wpos], & + ldw); + + cgemm_("No transpose", "Conjugate", &pk, &pk, &pn, &c_b2, &work[ + wpos], &ldw, &work[s2pos], &lds2, &c_b1, &work[s1pos], & + lds1); + + q__1.r = -.5f, q__1.i = 0.f; + cgemm_("No transpose", "No transpose", &pk, &pn, &pk, &q__1, & + work[s1pos], &lds1, &a[i__ + (i__ + *kd) * a_dim1], lda, & + c_b2, &work[wpos], &ldw); + + +/* Update the unreduced submatrix A(i+kd:n,i+kd:n), using */ +/* an update of the form: A := A - V'*W - W'*V */ + + q__1.r = -1.f, q__1.i = 0.f; + cher2k_(uplo, "Conjugate", &pn, &pk, &q__1, &a[i__ + (i__ + *kd) * + a_dim1], lda, &work[wpos], &ldw, &c_b33, &a[i__ + *kd + ( + i__ + *kd) * a_dim1], lda); +/* L10: */ + } + +/* Copy the upper band to AB which is the band storage matrix */ + + i__2 = *n; + for (j = *n - *kd + 1; j <= i__2; ++j) { +/* Computing MIN */ + i__1 = *kd, i__3 = *n - j; + lk = f2cmin(i__1,i__3) + 1; + i__1 = *ldab - 1; + ccopy_(&lk, &a[j + j * a_dim1], lda, &ab[*kd + 1 + j * ab_dim1], & + i__1); +/* L30: */ + } + + } else { + +/* Reduce the lower triangle of A to lower band matrix */ + + i__2 = *n - *kd; + i__1 = *kd; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + pn = *n - i__ - *kd + 1; +/* Computing MIN */ + i__3 = *n - i__ - *kd + 1; + pk = f2cmin(i__3,*kd); + +/* Compute the QR factorization of the current block */ + + cgeqrf_(&pn, kd, &a[i__ + *kd + i__ * a_dim1], lda, &tau[i__], & + work[s2pos], &ls2, &iinfo); + +/* Copy the upper portion of A into AB */ + + i__3 = i__ + pk - 1; + for (j = i__; j <= i__3; ++j) { +/* Computing MIN */ + i__4 = *kd, i__5 = *n - j; + lk = f2cmin(i__4,i__5) + 1; + ccopy_(&lk, &a[j + j * a_dim1], &c__1, &ab[j * ab_dim1 + 1], & + c__1); +/* L50: */ + } + + claset_("Upper", &pk, &pk, &c_b1, &c_b2, &a[i__ + *kd + i__ * + a_dim1], lda); + +/* Form the matrix T */ + + clarft_("Forward", "Columnwise", &pn, &pk, &a[i__ + *kd + i__ * + a_dim1], lda, &tau[i__], &work[tpos], &ldt); + +/* Compute W: */ + + cgemm_("No transpose", "No transpose", &pn, &pk, &pk, &c_b2, &a[ + i__ + *kd + i__ * a_dim1], lda, &work[tpos], &ldt, &c_b1, + &work[s2pos], &lds2); + + chemm_("Left", uplo, &pn, &pk, &c_b2, &a[i__ + *kd + (i__ + *kd) * + a_dim1], lda, &work[s2pos], &lds2, &c_b1, &work[wpos], & + ldw); + + cgemm_("Conjugate", "No transpose", &pk, &pk, &pn, &c_b2, &work[ + s2pos], &lds2, &work[wpos], &ldw, &c_b1, &work[s1pos], & + lds1); + + q__1.r = -.5f, q__1.i = 0.f; + cgemm_("No transpose", "No transpose", &pn, &pk, &pk, &q__1, &a[ + i__ + *kd + i__ * a_dim1], lda, &work[s1pos], &lds1, & + c_b2, &work[wpos], &ldw); + + +/* Update the unreduced submatrix A(i+kd:n,i+kd:n), using */ +/* an update of the form: A := A - V*W' - W*V' */ + + q__1.r = -1.f, q__1.i = 0.f; + cher2k_(uplo, "No transpose", &pn, &pk, &q__1, &a[i__ + *kd + i__ + * a_dim1], lda, &work[wpos], &ldw, &c_b33, &a[i__ + *kd + + (i__ + *kd) * a_dim1], lda); +/* ================================================================== */ +/* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED */ +/* DO 45 J = I, I+PK-1 */ +/* LK = MIN( KD, N-J ) + 1 */ +/* CALL CCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) */ +/* 45 CONTINUE */ +/* ================================================================== */ +/* L40: */ + } + +/* Copy the lower band to AB which is the band storage matrix */ + + i__1 = *n; + for (j = *n - *kd + 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *kd, i__3 = *n - j; + lk = f2cmin(i__2,i__3) + 1; + ccopy_(&lk, &a[j + j * a_dim1], &c__1, &ab[j * ab_dim1 + 1], & + c__1); +/* L60: */ + } + } + + work[1].r = (real) lwmin, work[1].i = 0.f; + return 0; + +/* End of CHETRD_HE2HB */ + +} /* chetrd_he2hb__ */ +