Add C versions as fallback

4 years ago · 8efa762512
--- a/lapack-netlib/SRC/cggbal.c
+++ b/lapack-netlib/SRC/cggbal.c
--- a/lapack-netlib/SRC/cgges.c
+++ b/lapack-netlib/SRC/cgges.c
--- a/lapack-netlib/SRC/cgges3.c
+++ b/lapack-netlib/SRC/cgges3.c
--- a/lapack-netlib/SRC/cggesx.c
+++ b/lapack-netlib/SRC/cggesx.c
--- a/lapack-netlib/SRC/cggev.c
+++ b/lapack-netlib/SRC/cggev.c
--- a/lapack-netlib/SRC/cggev3.c
+++ b/lapack-netlib/SRC/cggev3.c
--- a/lapack-netlib/SRC/cggevx.c
+++ b/lapack-netlib/SRC/cggevx.c
--- a/lapack-netlib/SRC/cggglm.c
+++ b/lapack-netlib/SRC/cggglm.c
@@ -0,0 +1,798 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b2 = {1.f,0.f};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b CGGGLM */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGGGLM + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cggglm.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cggglm.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cggglm.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, */
 /*                          INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), */
 /*      $                   X( * ), Y( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGGGLM solves a general Gauss-Markov linear model (GLM) problem: */
 /* > */
 /* >         minimize || y ||_2   subject to   d = A*x + B*y */
 /* >             x */
 /* > */
 /* > where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */
 /* > given N-vector. It is assumed that M <= N <= M+P, and */
 /* > */
 /* >            rank(A) = M    and    rank( A B ) = N. */
 /* > */
 /* > Under these assumptions, the constrained equation is always */
 /* > consistent, and there is a unique solution x and a minimal 2-norm */
 /* > solution y, which is obtained using a generalized QR factorization */
 /* > of the matrices (A, B) given by */
 /* > */
 /* >    A = Q*(R),   B = Q*T*Z. */
 /* >          (0) */
 /* > */
 /* > In particular, if matrix B is square nonsingular, then the problem */
 /* > GLM is equivalent to the following weighted linear least squares */
 /* > problem */
 /* > */
 /* >              minimize || inv(B)*(d-A*x) ||_2 */
 /* >                  x */
 /* > */
 /* > where inv(B) denotes the inverse of B. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix A.  0 <= M <= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of columns of the matrix B.  P >= N-M. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,M) */
 /* >          On entry, the N-by-M matrix A. */
 /* >          On exit, the upper triangular part of the array A contains */
 /* >          the M-by-M upper triangular matrix R. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,P) */
 /* >          On entry, the N-by-P matrix B. */
 /* >          On exit, if N <= P, the upper triangle of the subarray */
 /* >          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */
 /* >          if N > P, the elements on and above the (N-P)th subdiagonal */
 /* >          contain the N-by-P upper trapezoidal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is COMPLEX array, dimension (N) */
 /* >          On entry, D is the left hand side of the GLM equation. */
 /* >          On exit, D is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is COMPLEX array, dimension (M) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Y */
 /* > \verbatim */
 /* >          Y is COMPLEX array, dimension (P) */
 /* > */
 /* >          On exit, X and Y are the solutions of the GLM problem. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,N+M+P). */
 /* >          For optimum performance, LWORK >= M+f2cmin(N,P)+f2cmax(N,P)*NB, */
 /* >          where NB is an upper bound for the optimal blocksizes for */
 /* >          CGEQRF, CGERQF, CUNMQR and CUNMRQ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          = 1:  the upper triangular factor R associated with A in the */
 /* >                generalized QR factorization of the pair (A, B) is */
 /* >                singular, so that rank(A) < M; the least squares */
 /* >                solution could not be computed. */
 /* >          = 2:  the bottom (N-M) by (N-M) part of the upper trapezoidal */
 /* >                factor T associated with B in the generalized QR */
 /* >                factorization of the pair (A, B) is singular, so that */
 /* >                rank( A B ) < N; the least squares solution could not */
 /* >                be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexOTHEReigen */

 /*  ===================================================================== */
 /* Subroutine */ int cggglm_(integer *n, integer *m, integer *p, complex *a, 
 	integer *lda, complex *b, integer *ldb, complex *d__, complex *x, 
 	complex *y, complex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    complex q__1;

    /* Local variables */
    integer lopt, i__;
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
 	    , complex *, integer *, complex *, integer *, complex *, complex *
 	    , integer *), ccopy_(integer *, complex *, integer *, 
 	    complex *, integer *);
    integer nb, np;
    extern /* Subroutine */ int cggqrf_(integer *, integer *, integer *, 
 	    complex *, integer *, complex *, complex *, integer *, complex *, 
 	    complex *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer lwkmin, nb1, nb2, nb3, nb4;
    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, 
 	    integer *, complex *, integer *, complex *, complex *, integer *, 
 	    complex *, integer *, integer *), cunmrq_(char *, 
 	    char *, integer *, integer *, integer *, complex *, integer *, 
 	    complex *, complex *, integer *, complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int ctrtrs_(char *, char *, char *, integer *, 
 	    integer *, complex *, integer *, complex *, integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  =================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --d__;
    --x;
    --y;
    --work;

    /* Function Body */
    *info = 0;
    np = f2cmin(*n,*p);
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*m < 0 || *m > *n) {
 	*info = -2;
    } else if (*p < 0 || *p < *n - *m) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }

 /*     Calculate workspace */

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkmin = 1;
 	    lwkopt = 1;
 	} else {
 	    nb1 = ilaenv_(&c__1, "CGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb2 = ilaenv_(&c__1, "CGERQF", " ", n, m, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb3 = ilaenv_(&c__1, "CUNMQR", " ", n, m, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	    nb4 = ilaenv_(&c__1, "CUNMRQ", " ", n, m, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 /* Computing MAX */
 	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
 	    nb = f2cmax(i__1,nb4);
 	    lwkmin = *m + *n + *p;
 	    lwkopt = *m + np + f2cmax(*n,*p) * nb;
 	}
 	work[1].r = (real) lwkopt, work[1].i = 0.f;

 	if (*lwork < lwkmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGGGLM", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    x[i__2].r = 0.f, x[i__2].i = 0.f;
 	}
 	i__1 = *p;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    y[i__2].r = 0.f, y[i__2].i = 0.f;
 	}
 	return 0;
    }

 /*     Compute the GQR factorization of matrices A and B: */

 /*          Q**H*A = ( R11 ) M,    Q**H*B*Z**H = ( T11   T12 ) M */
 /*                   (  0  ) N-M                 (  0    T22 ) N-M */
 /*                      M                         M+P-N  N-M */

 /*     where R11 and T22 are upper triangular, and Q and Z are */
 /*     unitary. */

    i__1 = *lwork - *m - np;
    cggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m 
 	    + 1], &work[*m + np + 1], &i__1, info);
    i__1 = *m + np + 1;
    lopt = work[i__1].r;

 /*     Update left-hand-side vector d = Q**H*d = ( d1 ) M */
 /*                                               ( d2 ) N-M */

    i__1 = f2cmax(1,*n);
    i__2 = *lwork - *m - np;
    cunmqr_("Left", "Conjugate transpose", n, &c__1, m, &a[a_offset], lda, &
 	    work[1], &d__[1], &i__1, &work[*m + np + 1], &i__2, info);
 /* Computing MAX */
    i__3 = *m + np + 1;
    i__1 = lopt, i__2 = (integer) work[i__3].r;
    lopt = f2cmax(i__1,i__2);

 /*     Solve T22*y2 = d2 for y2 */

    if (*n > *m) {
 	i__1 = *n - *m;
 	i__2 = *n - *m;
 	ctrtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 
 		+ (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, 
 		info);

 	if (*info > 0) {
 	    *info = 1;
 	    return 0;
 	}

 	i__1 = *n - *m;
 	ccopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1);
    }

 /*     Set y1 = 0 */

    i__1 = *m + *p - *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	y[i__2].r = 0.f, y[i__2].i = 0.f;
 /* L10: */
    }

 /*     Update d1 = d1 - T12*y2 */

    i__1 = *n - *m;
    q__1.r = -1.f, q__1.i = 0.f;
    cgemv_("No transpose", m, &i__1, &q__1, &b[(*m + *p - *n + 1) * b_dim1 + 
 	    1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b2, &d__[1], &c__1);

 /*     Solve triangular system: R11*x = d1 */

    if (*m > 0) {
 	ctrtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], 
 		lda, &d__[1], m, info);

 	if (*info > 0) {
 	    *info = 2;
 	    return 0;
 	}

 /*        Copy D to X */

 	ccopy_(m, &d__[1], &c__1, &x[1], &c__1);
    }

 /*     Backward transformation y = Z**H *y */

 /* Computing MAX */
    i__1 = 1, i__2 = *n - *p + 1;
    i__3 = f2cmax(1,*p);
    i__4 = *lwork - *m - np;
    cunmrq_("Left", "Conjugate transpose", p, &c__1, &np, &b[f2cmax(i__1,i__2) + 
 	    b_dim1], ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &
 	    i__4, info);
 /* Computing MAX */
    i__4 = *m + np + 1;
    i__2 = lopt, i__3 = (integer) work[i__4].r;
    i__1 = *m + np + f2cmax(i__2,i__3);
    work[1].r = (real) i__1, work[1].i = 0.f;

    return 0;

 /*     End of CGGGLM */

 } /* cggglm_ */

--- a/lapack-netlib/SRC/cgghd3.c
+++ b/lapack-netlib/SRC/cgghd3.c
--- a/lapack-netlib/SRC/cgghrd.c
+++ b/lapack-netlib/SRC/cgghrd.c
@@ -0,0 +1,788 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};
 static complex c_b2 = {0.f,0.f};
 static integer c__1 = 1;

 /* > \brief \b CGGHRD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGGHRD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgghrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgghrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgghrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */
 /*                          LDQ, Z, LDZ, INFO ) */

 /*       CHARACTER          COMPQ, COMPZ */
 /*       INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGGHRD reduces a pair of complex matrices (A,B) to generalized upper */
 /* > Hessenberg form using unitary transformations, where A is a */
 /* > general matrix and B is upper triangular.  The form of the generalized */
 /* > eigenvalue problem is */
 /* >    A*x = lambda*B*x, */
 /* > and B is typically made upper triangular by computing its QR */
 /* > factorization and moving the unitary matrix Q to the left side */
 /* > of the equation. */
 /* > */
 /* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */
 /* >    Q**H*A*Z = H */
 /* > and transforms B to another upper triangular matrix T: */
 /* >    Q**H*B*Z = T */
 /* > in order to reduce the problem to its standard form */
 /* >    H*y = lambda*T*y */
 /* > where y = Z**H*x. */
 /* > */
 /* > The unitary matrices Q and Z are determined as products of Givens */
 /* > rotations.  They may either be formed explicitly, or they may be */
 /* > postmultiplied into input matrices Q1 and Z1, so that */
 /* >      Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H */
 /* >      Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H */
 /* > If Q1 is the unitary matrix from the QR factorization of B in the */
 /* > original equation A*x = lambda*B*x, then CGGHRD reduces the original */
 /* > problem to generalized Hessenberg form. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] COMPQ */
 /* > \verbatim */
 /* >          COMPQ is CHARACTER*1 */
 /* >          = 'N': do not compute Q; */
 /* >          = 'I': Q is initialized to the unit matrix, and the */
 /* >                 unitary matrix Q is returned; */
 /* >          = 'V': Q must contain a unitary matrix Q1 on entry, */
 /* >                 and the product Q1*Q is returned. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] COMPZ */
 /* > \verbatim */
 /* >          COMPZ is CHARACTER*1 */
 /* >          = 'N': do not compute Z; */
 /* >          = 'I': Z is initialized to the unit matrix, and the */
 /* >                 unitary matrix Z is returned; */
 /* >          = 'V': Z must contain a unitary matrix Z1 on entry, */
 /* >                 and the product Z1*Z is returned. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* > */
 /* >          ILO and IHI mark the rows and columns of A which are to be */
 /* >          reduced.  It is assumed that A is already upper triangular */
 /* >          in rows and columns 1:ILO-1 and IHI+1:N.  ILO and IHI are */
 /* >          normally set by a previous call to CGGBAL; otherwise they */
 /* >          should be set to 1 and N respectively. */
 /* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA, N) */
 /* >          On entry, the N-by-N general matrix to be reduced. */
 /* >          On exit, the upper triangle and the first subdiagonal of A */
 /* >          are overwritten with the upper Hessenberg matrix H, and the */
 /* >          rest is set to zero. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB, N) */
 /* >          On entry, the N-by-N upper triangular matrix B. */
 /* >          On exit, the upper triangular matrix T = Q**H B Z.  The */
 /* >          elements below the diagonal are set to zero. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] Q */
 /* > \verbatim */
 /* >          Q is COMPLEX array, dimension (LDQ, N) */
 /* >          On entry, if COMPQ = 'V', the unitary matrix Q1, typically */
 /* >          from the QR factorization of B. */
 /* >          On exit, if COMPQ='I', the unitary matrix Q, and if */
 /* >          COMPQ = 'V', the product Q1*Q. */
 /* >          Not referenced if COMPQ='N'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDQ */
 /* > \verbatim */
 /* >          LDQ is INTEGER */
 /* >          The leading dimension of the array Q. */
 /* >          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX array, dimension (LDZ, N) */
 /* >          On entry, if COMPZ = 'V', the unitary matrix Z1. */
 /* >          On exit, if COMPZ='I', the unitary matrix Z, and if */
 /* >          COMPZ = 'V', the product Z1*Z. */
 /* >          Not referenced if COMPZ='N'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z. */
 /* >          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexOTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  This routine reduces A to Hessenberg and B to triangular form by */
 /* >  an unblocked reduction, as described in _Matrix_Computations_, */
 /* >  by Golub and van Loan (Johns Hopkins Press). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int cgghrd_(char *compq, char *compz, integer *n, integer *
 	ilo, integer *ihi, complex *a, integer *lda, complex *b, integer *ldb,
 	 complex *q, integer *ldq, complex *z__, integer *ldz, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, 
 	    z_offset, i__1, i__2, i__3;
    complex q__1;

    /* Local variables */
    integer jcol;
    extern /* Subroutine */ int crot_(integer *, complex *, integer *, 
 	    complex *, integer *, real *, complex *);
    integer jrow;
    real c__;
    complex s;
    extern logical lsame_(char *, char *);
    complex ctemp;
    extern /* Subroutine */ int claset_(char *, integer *, integer *, complex 
 	    *, complex *, complex *, integer *), clartg_(complex *, 
 	    complex *, real *, complex *, complex *), xerbla_(char *, integer 
 	    *, ftnlen);
    integer icompq, icompz;
    logical ilq, ilz;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Decode COMPQ */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;

    /* Function Body */
    if (lsame_(compq, "N")) {
 	ilq = FALSE_;
 	icompq = 1;
    } else if (lsame_(compq, "V")) {
 	ilq = TRUE_;
 	icompq = 2;
    } else if (lsame_(compq, "I")) {
 	ilq = TRUE_;
 	icompq = 3;
    } else {
 	icompq = 0;
    }

 /*     Decode COMPZ */

    if (lsame_(compz, "N")) {
 	ilz = FALSE_;
 	icompz = 1;
    } else if (lsame_(compz, "V")) {
 	ilz = TRUE_;
 	icompz = 2;
    } else if (lsame_(compz, "I")) {
 	ilz = TRUE_;
 	icompz = 3;
    } else {
 	icompz = 0;
    }

 /*     Test the input parameters. */

    *info = 0;
    if (icompq <= 0) {
 	*info = -1;
    } else if (icompz <= 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ilo < 1) {
 	*info = -4;
    } else if (*ihi > *n || *ihi < *ilo - 1) {
 	*info = -5;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -9;
    } else if (ilq && *ldq < *n || *ldq < 1) {
 	*info = -11;
    } else if (ilz && *ldz < *n || *ldz < 1) {
 	*info = -13;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGGHRD", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Initialize Q and Z if desired. */

    if (icompq == 3) {
 	claset_("Full", n, n, &c_b2, &c_b1, &q[q_offset], ldq);
    }
    if (icompz == 3) {
 	claset_("Full", n, n, &c_b2, &c_b1, &z__[z_offset], ldz);
    }

 /*     Quick return if possible */

    if (*n <= 1) {
 	return 0;
    }

 /*     Zero out lower triangle of B */

    i__1 = *n - 1;
    for (jcol = 1; jcol <= i__1; ++jcol) {
 	i__2 = *n;
 	for (jrow = jcol + 1; jrow <= i__2; ++jrow) {
 	    i__3 = jrow + jcol * b_dim1;
 	    b[i__3].r = 0.f, b[i__3].i = 0.f;
 /* L10: */
 	}
 /* L20: */
    }

 /*     Reduce A and B */

    i__1 = *ihi - 2;
    for (jcol = *ilo; jcol <= i__1; ++jcol) {

 	i__2 = jcol + 2;
 	for (jrow = *ihi; jrow >= i__2; --jrow) {

 /*           Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */

 	    i__3 = jrow - 1 + jcol * a_dim1;
 	    ctemp.r = a[i__3].r, ctemp.i = a[i__3].i;
 	    clartg_(&ctemp, &a[jrow + jcol * a_dim1], &c__, &s, &a[jrow - 1 + 
 		    jcol * a_dim1]);
 	    i__3 = jrow + jcol * a_dim1;
 	    a[i__3].r = 0.f, a[i__3].i = 0.f;
 	    i__3 = *n - jcol;
 	    crot_(&i__3, &a[jrow - 1 + (jcol + 1) * a_dim1], lda, &a[jrow + (
 		    jcol + 1) * a_dim1], lda, &c__, &s);
 	    i__3 = *n + 2 - jrow;
 	    crot_(&i__3, &b[jrow - 1 + (jrow - 1) * b_dim1], ldb, &b[jrow + (
 		    jrow - 1) * b_dim1], ldb, &c__, &s);
 	    if (ilq) {
 		r_cnjg(&q__1, &s);
 		crot_(n, &q[(jrow - 1) * q_dim1 + 1], &c__1, &q[jrow * q_dim1 
 			+ 1], &c__1, &c__, &q__1);
 	    }

 /*           Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */

 	    i__3 = jrow + jrow * b_dim1;
 	    ctemp.r = b[i__3].r, ctemp.i = b[i__3].i;
 	    clartg_(&ctemp, &b[jrow + (jrow - 1) * b_dim1], &c__, &s, &b[jrow 
 		    + jrow * b_dim1]);
 	    i__3 = jrow + (jrow - 1) * b_dim1;
 	    b[i__3].r = 0.f, b[i__3].i = 0.f;
 	    crot_(ihi, &a[jrow * a_dim1 + 1], &c__1, &a[(jrow - 1) * a_dim1 + 
 		    1], &c__1, &c__, &s);
 	    i__3 = jrow - 1;
 	    crot_(&i__3, &b[jrow * b_dim1 + 1], &c__1, &b[(jrow - 1) * b_dim1 
 		    + 1], &c__1, &c__, &s);
 	    if (ilz) {
 		crot_(n, &z__[jrow * z_dim1 + 1], &c__1, &z__[(jrow - 1) * 
 			z_dim1 + 1], &c__1, &c__, &s);
 	    }
 /* L30: */
 	}
 /* L40: */
    }

    return 0;

 /*     End of CGGHRD */

 } /* cgghrd_ */

--- a/lapack-netlib/SRC/cgglse.c
+++ b/lapack-netlib/SRC/cgglse.c
@@ -0,0 +1,794 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief <b> CGGLSE solves overdetermined or underdetermined systems for OTHER matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGGLSE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgglse.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgglse.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgglse.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, */
 /*                          INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), C( * ), D( * ), */
 /*      $                   WORK( * ), X( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGGLSE solves the linear equality-constrained least squares (LSE) */
 /* > problem: */
 /* > */
 /* >         minimize || c - A*x ||_2   subject to   B*x = d */
 /* > */
 /* > where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */
 /* > M-vector, and d is a given P-vector. It is assumed that */
 /* > P <= N <= M+P, and */
 /* > */
 /* >          rank(B) = P and  rank( (A) ) = N. */
 /* >                               ( (B) ) */
 /* > */
 /* > These conditions ensure that the LSE problem has a unique solution, */
 /* > which is obtained using a generalized RQ factorization of the */
 /* > matrices (B, A) given by */
 /* > */
 /* >    B = (0 R)*Q,   A = Z*T*Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrices A and B. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of rows of the matrix B. 0 <= P <= N <= M+P. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,N) */
 /* >          On entry, the P-by-N matrix B. */
 /* >          On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */
 /* >          contains the P-by-P upper triangular matrix R. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is COMPLEX array, dimension (M) */
 /* >          On entry, C contains the right hand side vector for the */
 /* >          least squares part of the LSE problem. */
 /* >          On exit, the residual sum of squares for the solution */
 /* >          is given by the sum of squares of elements N-P+1 to M of */
 /* >          vector C. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is COMPLEX array, dimension (P) */
 /* >          On entry, D contains the right hand side vector for the */
 /* >          constrained equation. */
 /* >          On exit, D is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is COMPLEX array, dimension (N) */
 /* >          On exit, X is the solution of the LSE problem. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,M+N+P). */
 /* >          For optimum performance LWORK >= P+f2cmin(M,N)+f2cmax(M,N)*NB, */
 /* >          where NB is an upper bound for the optimal blocksizes for */
 /* >          CGEQRF, CGERQF, CUNMQR and CUNMRQ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          = 1:  the upper triangular factor R associated with B in the */
 /* >                generalized RQ factorization of the pair (B, A) is */
 /* >                singular, so that rank(B) < P; the least squares */
 /* >                solution could not be computed. */
 /* >          = 2:  the (N-P) by (N-P) part of the upper trapezoidal factor */
 /* >                T associated with A in the generalized RQ factorization */
 /* >                of the pair (B, A) is singular, so that */
 /* >                rank( (A) ) < N; the least squares solution could not */
 /* >                    ( (B) ) */
 /* >                be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexOTHERsolve */

 /*  ===================================================================== */
 /* Subroutine */ int cgglse_(integer *m, integer *n, integer *p, complex *a, 
 	integer *lda, complex *b, integer *ldb, complex *c__, complex *d__, 
 	complex *x, complex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    complex q__1;

    /* Local variables */
    integer lopt;
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
 	    , complex *, integer *, complex *, integer *, complex *, complex *
 	    , integer *), ccopy_(integer *, complex *, integer *, 
 	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
 	    integer *, complex *, integer *), ctrmv_(char *, char *, char *, 
 	    integer *, complex *, integer *, complex *, integer *);
    integer nb, mn, nr;
    extern /* Subroutine */ int cggrqf_(integer *, integer *, integer *, 
 	    complex *, integer *, complex *, complex *, integer *, complex *, 
 	    complex *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer lwkmin, nb1, nb2, nb3, nb4;
    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, 
 	    integer *, complex *, integer *, complex *, complex *, integer *, 
 	    complex *, integer *, integer *), cunmrq_(char *, 
 	    char *, integer *, integer *, integer *, complex *, integer *, 
 	    complex *, complex *, integer *, complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int ctrtrs_(char *, char *, char *, integer *, 
 	    integer *, complex *, integer *, complex *, integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --c__;
    --d__;
    --x;
    --work;

    /* Function Body */
    *info = 0;
    mn = f2cmin(*m,*n);
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*p < 0 || *p > *n || *p < *n - *m) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*p)) {
 	*info = -7;
    }

 /*     Calculate workspace */

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkmin = 1;
 	    lwkopt = 1;
 	} else {
 	    nb1 = ilaenv_(&c__1, "CGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb2 = ilaenv_(&c__1, "CGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb3 = ilaenv_(&c__1, "CUNMQR", " ", m, n, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	    nb4 = ilaenv_(&c__1, "CUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 /* Computing MAX */
 	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
 	    nb = f2cmax(i__1,nb4);
 	    lwkmin = *m + *n + *p;
 	    lwkopt = *p + mn + f2cmax(*m,*n) * nb;
 	}
 	work[1].r = (real) lwkopt, work[1].i = 0.f;

 	if (*lwork < lwkmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGGLSE", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Compute the GRQ factorization of matrices B and A: */

 /*            B*Q**H = (  0  T12 ) P   Z**H*A*Q**H = ( R11 R12 ) N-P */
 /*                        N-P  P                     (  0  R22 ) M+P-N */
 /*                                                      N-P  P */

 /*     where T12 and R11 are upper triangular, and Q and Z are */
 /*     unitary. */

    i__1 = *lwork - *p - mn;
    cggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p 
 	    + 1], &work[*p + mn + 1], &i__1, info);
    i__1 = *p + mn + 1;
    lopt = work[i__1].r;

 /*     Update c = Z**H *c = ( c1 ) N-P */
 /*                       ( c2 ) M+P-N */

    i__1 = f2cmax(1,*m);
    i__2 = *lwork - *p - mn;
    cunmqr_("Left", "Conjugate Transpose", m, &c__1, &mn, &a[a_offset], lda, &
 	    work[*p + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info);
 /* Computing MAX */
    i__3 = *p + mn + 1;
    i__1 = lopt, i__2 = (integer) work[i__3].r;
    lopt = f2cmax(i__1,i__2);

 /*     Solve T12*x2 = d for x2 */

    if (*p > 0) {
 	ctrtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + 
 		1) * b_dim1 + 1], ldb, &d__[1], p, info);

 	if (*info > 0) {
 	    *info = 1;
 	    return 0;
 	}

 /*        Put the solution in X */

 	ccopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1);

 /*        Update c1 */

 	i__1 = *n - *p;
 	q__1.r = -1.f, q__1.i = 0.f;
 	cgemv_("No transpose", &i__1, p, &q__1, &a[(*n - *p + 1) * a_dim1 + 1]
 		, lda, &d__[1], &c__1, &c_b1, &c__[1], &c__1);
    }

 /*     Solve R11*x1 = c1 for x1 */

    if (*n > *p) {
 	i__1 = *n - *p;
 	i__2 = *n - *p;
 	ctrtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[
 		a_offset], lda, &c__[1], &i__2, info);

 	if (*info > 0) {
 	    *info = 2;
 	    return 0;
 	}

 /*        Put the solutions in X */

 	i__1 = *n - *p;
 	ccopy_(&i__1, &c__[1], &c__1, &x[1], &c__1);
    }

 /*     Compute the residual vector: */

    if (*m < *n) {
 	nr = *m + *p - *n;
 	if (nr > 0) {
 	    i__1 = *n - *m;
 	    q__1.r = -1.f, q__1.i = 0.f;
 	    cgemv_("No transpose", &nr, &i__1, &q__1, &a[*n - *p + 1 + (*m + 
 		    1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b1, &c__[*n - *
 		    p + 1], &c__1);
 	}
    } else {
 	nr = *p;
    }
    if (nr > 0) {
 	ctrmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n 
 		- *p + 1) * a_dim1], lda, &d__[1], &c__1);
 	q__1.r = -1.f, q__1.i = 0.f;
 	caxpy_(&nr, &q__1, &d__[1], &c__1, &c__[*n - *p + 1], &c__1);
    }

 /*     Backward transformation x = Q**H*x */

    i__1 = *lwork - *p - mn;
    cunmrq_("Left", "Conjugate Transpose", n, &c__1, p, &b[b_offset], ldb, &
 	    work[1], &x[1], n, &work[*p + mn + 1], &i__1, info);
 /* Computing MAX */
    i__4 = *p + mn + 1;
    i__2 = lopt, i__3 = (integer) work[i__4].r;
    i__1 = *p + mn + f2cmax(i__2,i__3);
    work[1].r = (real) i__1, work[1].i = 0.f;

    return 0;

 /*     End of CGGLSE */

 } /* cgglse_ */

--- a/lapack-netlib/SRC/cggqrf.c
+++ b/lapack-netlib/SRC/cggqrf.c
@@ -0,0 +1,719 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b CGGQRF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGGQRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cggqrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cggqrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cggqrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, */
 /*                          LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */
 /*      $                   WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGGQRF computes a generalized QR factorization of an N-by-M matrix A */
 /* > and an N-by-P matrix B: */
 /* > */
 /* >             A = Q*R,        B = Q*T*Z, */
 /* > */
 /* > where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, */
 /* > and R and T assume one of the forms: */
 /* > */
 /* > if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N, */
 /* >                 (  0  ) N-M                         N   M-N */
 /* >                    M */
 /* > */
 /* > where R11 is upper triangular, and */
 /* > */
 /* > if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P, */
 /* >                  P-N  N                           ( T21 ) P */
 /* >                                                      P */
 /* > */
 /* > where T12 or T21 is upper triangular. */
 /* > */
 /* > In particular, if B is square and nonsingular, the GQR factorization */
 /* > of A and B implicitly gives the QR factorization of inv(B)*A: */
 /* > */
 /* >              inv(B)*A = Z**H * (inv(T)*R) */
 /* > */
 /* > where inv(B) denotes the inverse of the matrix B, and Z' denotes the */
 /* > conjugate transpose of matrix Z. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrices A and B. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of columns of the matrix B.  P >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,M) */
 /* >          On entry, the N-by-M matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(N,M)-by-M upper trapezoidal matrix R (R is */
 /* >          upper triangular if N >= M); the elements below the diagonal, */
 /* >          with the array TAUA, represent the unitary matrix Q as a */
 /* >          product of f2cmin(N,M) elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUA */
 /* > \verbatim */
 /* >          TAUA is COMPLEX array, dimension (f2cmin(N,M)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the unitary matrix Q (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,P) */
 /* >          On entry, the N-by-P matrix B. */
 /* >          On exit, if N <= P, the upper triangle of the subarray */
 /* >          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */
 /* >          if N > P, the elements on and above the (N-P)-th subdiagonal */
 /* >          contain the N-by-P upper trapezoidal matrix T; the remaining */
 /* >          elements, with the array TAUB, represent the unitary */
 /* >          matrix Z as a product of elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUB */
 /* > \verbatim */
 /* >          TAUB is COMPLEX array, dimension (f2cmin(N,P)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the unitary matrix Z (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */
 /* >          For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */
 /* >          where NB1 is the optimal blocksize for the QR factorization */
 /* >          of an N-by-M matrix, NB2 is the optimal blocksize for the */
 /* >          RQ factorization of an N-by-P matrix, and NB3 is the optimal */
 /* >          blocksize for a call of CUNMQR. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >           = 0:  successful exit */
 /* >           < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexOTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(n,m). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taua * v * v**H */
 /* > */
 /* >  where taua is a complex scalar, and v is a complex vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */
 /* >  and taua in TAUA(i). */
 /* >  To form Q explicitly, use LAPACK subroutine CUNGQR. */
 /* >  To use Q to update another matrix, use LAPACK subroutine CUNMQR. */
 /* > */
 /* >  The matrix Z is represented as a product of elementary reflectors */
 /* > */
 /* >     Z = H(1) H(2) . . . H(k), where k = f2cmin(n,p). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taub * v * v**H */
 /* > */
 /* >  where taub is a complex scalar, and v is a complex vector with */
 /* >  v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in */
 /* >  B(n-k+i,1:p-k+i-1), and taub in TAUB(i). */
 /* >  To form Z explicitly, use LAPACK subroutine CUNGRQ. */
 /* >  To use Z to update another matrix, use LAPACK subroutine CUNMRQ. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int cggqrf_(integer *n, integer *m, integer *p, complex *a, 
 	integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, 
 	complex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer lopt, nb;
    extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, 
 	    integer *, complex *, complex *, integer *, integer *), cgerqf_(
 	    integer *, integer *, complex *, integer *, complex *, complex *, 
 	    integer *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer nb1, nb2, nb3;
    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, 
 	    integer *, complex *, integer *, complex *, complex *, integer *, 
 	    complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;

    /* Function Body */
    *info = 0;
    nb1 = ilaenv_(&c__1, "CGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb2 = ilaenv_(&c__1, "CGERQF", " ", n, p, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb3 = ilaenv_(&c__1, "CUNMQR", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
    i__1 = f2cmax(nb1,nb2);
    nb = f2cmax(i__1,nb3);
 /* Computing MAX */
    i__1 = f2cmax(*n,*m);
    lwkopt = f2cmax(i__1,*p) * nb;
    work[1].r = (real) lwkopt, work[1].i = 0.f;
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*m < 0) {
 	*info = -2;
    } else if (*p < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*n), i__1 = f2cmax(i__1,*m);
 	if (*lwork < f2cmax(i__1,*p) && ! lquery) {
 	    *info = -11;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGGQRF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     QR factorization of N-by-M matrix A: A = Q*R */

    cgeqrf_(n, m, &a[a_offset], lda, &taua[1], &work[1], lwork, info);
    lopt = work[1].r;

 /*     Update B := Q**H*B. */

    i__1 = f2cmin(*n,*m);
    cunmqr_("Left", "Conjugate Transpose", n, p, &i__1, &a[a_offset], lda, &
 	    taua[1], &b[b_offset], ldb, &work[1], lwork, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[1].r;
    lopt = f2cmax(i__1,i__2);

 /*     RQ factorization of N-by-P matrix B: B = T*Z. */

    cgerqf_(n, p, &b[b_offset], ldb, &taub[1], &work[1], lwork, info);
 /* Computing MAX */
    i__2 = lopt, i__3 = (integer) work[1].r;
    i__1 = f2cmax(i__2,i__3);
    work[1].r = (real) i__1, work[1].i = 0.f;

    return 0;

 /*     End of CGGQRF */

 } /* cggqrf_ */

--- a/lapack-netlib/SRC/cggrqf.c
+++ b/lapack-netlib/SRC/cggrqf.c
@@ -0,0 +1,720 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b CGGRQF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGGRQF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cggrqf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cggrqf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cggrqf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, */
 /*                          LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */
 /*      $                   WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGGRQF computes a generalized RQ factorization of an M-by-N matrix A */
 /* > and a P-by-N matrix B: */
 /* > */
 /* >             A = R*Q,        B = Z*T*Q, */
 /* > */
 /* > where Q is an N-by-N unitary matrix, Z is a P-by-P unitary */
 /* > matrix, and R and T assume one of the forms: */
 /* > */
 /* > if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N, */
 /* >                  N-M  M                           ( R21 ) N */
 /* >                                                      N */
 /* > */
 /* > where R12 or R21 is upper triangular, and */
 /* > */
 /* > if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P, */
 /* >                 (  0  ) P-N                         P   N-P */
 /* >                    N */
 /* > */
 /* > where T11 is upper triangular. */
 /* > */
 /* > In particular, if B is square and nonsingular, the GRQ factorization */
 /* > of A and B implicitly gives the RQ factorization of A*inv(B): */
 /* > */
 /* >              A*inv(B) = (R*inv(T))*Z**H */
 /* > */
 /* > where inv(B) denotes the inverse of the matrix B, and Z**H denotes the */
 /* > conjugate transpose of the matrix Z. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of rows of the matrix B.  P >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrices A and B. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, if M <= N, the upper triangle of the subarray */
 /* >          A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; */
 /* >          if M > N, the elements on and above the (M-N)-th subdiagonal */
 /* >          contain the M-by-N upper trapezoidal matrix R; the remaining */
 /* >          elements, with the array TAUA, represent the unitary */
 /* >          matrix Q as a product of elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUA */
 /* > \verbatim */
 /* >          TAUA is COMPLEX array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the unitary matrix Q (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,N) */
 /* >          On entry, the P-by-N matrix B. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(P,N)-by-N upper trapezoidal matrix T (T is */
 /* >          upper triangular if P >= N); the elements below the diagonal, */
 /* >          with the array TAUB, represent the unitary matrix Z as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUB */
 /* > \verbatim */
 /* >          TAUB is COMPLEX array, dimension (f2cmin(P,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the unitary matrix Z (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */
 /* >          For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */
 /* >          where NB1 is the optimal blocksize for the RQ factorization */
 /* >          of an M-by-N matrix, NB2 is the optimal blocksize for the */
 /* >          QR factorization of a P-by-N matrix, and NB3 is the optimal */
 /* >          blocksize for a call of CUNMRQ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO=-i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexOTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taua * v * v**H */
 /* > */
 /* >  where taua is a complex scalar, and v is a complex vector with */
 /* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
 /* >  A(m-k+i,1:n-k+i-1), and taua in TAUA(i). */
 /* >  To form Q explicitly, use LAPACK subroutine CUNGRQ. */
 /* >  To use Q to update another matrix, use LAPACK subroutine CUNMRQ. */
 /* > */
 /* >  The matrix Z is represented as a product of elementary reflectors */
 /* > */
 /* >     Z = H(1) H(2) . . . H(k), where k = f2cmin(p,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taub * v * v**H */
 /* > */
 /* >  where taub is a complex scalar, and v is a complex vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), */
 /* >  and taub in TAUB(i). */
 /* >  To form Z explicitly, use LAPACK subroutine CUNGQR. */
 /* >  To use Z to update another matrix, use LAPACK subroutine CUNMQR. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int cggrqf_(integer *m, integer *p, integer *n, complex *a, 
 	integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, 
 	complex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer lopt, nb;
    extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, 
 	    integer *, complex *, complex *, integer *, integer *), cgerqf_(
 	    integer *, integer *, complex *, integer *, complex *, complex *, 
 	    integer *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer nb1, nb2, nb3;
    extern /* Subroutine */ int cunmrq_(char *, char *, integer *, integer *, 
 	    integer *, complex *, integer *, complex *, complex *, integer *, 
 	    complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;

    /* Function Body */
    *info = 0;
    nb1 = ilaenv_(&c__1, "CGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb2 = ilaenv_(&c__1, "CGEQRF", " ", p, n, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb3 = ilaenv_(&c__1, "CUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
    i__1 = f2cmax(nb1,nb2);
    nb = f2cmax(i__1,nb3);
 /* Computing MAX */
    i__1 = f2cmax(*n,*m);
    lwkopt = f2cmax(i__1,*p) * nb;
    work[1].r = (real) lwkopt, work[1].i = 0.f;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*p < 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*p)) {
 	*info = -8;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m), i__1 = f2cmax(i__1,*p);
 	if (*lwork < f2cmax(i__1,*n) && ! lquery) {
 	    *info = -11;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGGRQF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     RQ factorization of M-by-N matrix A: A = R*Q */

    cgerqf_(m, n, &a[a_offset], lda, &taua[1], &work[1], lwork, info);
    lopt = work[1].r;

 /*     Update B := B*Q**H */

    i__1 = f2cmin(*m,*n);
 /* Computing MAX */
    i__2 = 1, i__3 = *m - *n + 1;
    cunmrq_("Right", "Conjugate Transpose", p, n, &i__1, &a[f2cmax(i__2,i__3) + 
 	    a_dim1], lda, &taua[1], &b[b_offset], ldb, &work[1], lwork, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[1].r;
    lopt = f2cmax(i__1,i__2);

 /*     QR factorization of P-by-N matrix B: B = Z*T */

    cgeqrf_(p, n, &b[b_offset], ldb, &taub[1], &work[1], lwork, info);
 /* Computing MAX */
    i__2 = lopt, i__3 = (integer) work[1].r;
    i__1 = f2cmax(i__2,i__3);
    work[1].r = (real) i__1, work[1].i = 0.f;

    return 0;

 /*     End of CGGRQF */

 } /* cggrqf_ */

--- a/lapack-netlib/SRC/cggsvd3.c
+++ b/lapack-netlib/SRC/cggsvd3.c
@@ -0,0 +1,946 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;
 static integer c__1 = 1;

 /* > \brief <b> CGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGGSVD3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cggsvd3
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cggsvd3
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cggsvd3
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, */
 /*                           LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, */
 /*                           LWORK, RWORK, IWORK, INFO ) */

 /*       CHARACTER          JOBQ, JOBU, JOBV */
 /*       INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */
 /*       INTEGER            IWORK( * ) */
 /*       REAL               ALPHA( * ), BETA( * ), RWORK( * ) */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */
 /*      $                   U( LDU, * ), V( LDV, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGGSVD3 computes the generalized singular value decomposition (GSVD) */
 /* > of an M-by-N complex matrix A and P-by-N complex matrix B: */
 /* > */
 /* >       U**H*A*Q = D1*( 0 R ),    V**H*B*Q = D2*( 0 R ) */
 /* > */
 /* > where U, V and Q are unitary matrices. */
 /* > Let K+L = the effective numerical rank of the */
 /* > matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper */
 /* > triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" */
 /* > matrices and of the following structures, respectively: */
 /* > */
 /* > If M-K-L >= 0, */
 /* > */
 /* >                     K  L */
 /* >        D1 =     K ( I  0 ) */
 /* >                 L ( 0  C ) */
 /* >             M-K-L ( 0  0 ) */
 /* > */
 /* >                   K  L */
 /* >        D2 =   L ( 0  S ) */
 /* >             P-L ( 0  0 ) */
 /* > */
 /* >                 N-K-L  K    L */
 /* >   ( 0 R ) = K (  0   R11  R12 ) */
 /* >             L (  0    0   R22 ) */
 /* > */
 /* > where */
 /* > */
 /* >   C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */
 /* >   S = diag( BETA(K+1),  ... , BETA(K+L) ), */
 /* >   C**2 + S**2 = I. */
 /* > */
 /* >   R is stored in A(1:K+L,N-K-L+1:N) on exit. */
 /* > */
 /* > If M-K-L < 0, */
 /* > */
 /* >                   K M-K K+L-M */
 /* >        D1 =   K ( I  0    0   ) */
 /* >             M-K ( 0  C    0   ) */
 /* > */
 /* >                     K M-K K+L-M */
 /* >        D2 =   M-K ( 0  S    0  ) */
 /* >             K+L-M ( 0  0    I  ) */
 /* >               P-L ( 0  0    0  ) */
 /* > */
 /* >                    N-K-L  K   M-K  K+L-M */
 /* >   ( 0 R ) =     K ( 0    R11  R12  R13  ) */
 /* >               M-K ( 0     0   R22  R23  ) */
 /* >             K+L-M ( 0     0    0   R33  ) */
 /* > */
 /* > where */
 /* > */
 /* >   C = diag( ALPHA(K+1), ... , ALPHA(M) ), */
 /* >   S = diag( BETA(K+1),  ... , BETA(M) ), */
 /* >   C**2 + S**2 = I. */
 /* > */
 /* >   (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored */
 /* >   ( 0  R22 R23 ) */
 /* >   in B(M-K+1:L,N+M-K-L+1:N) on exit. */
 /* > */
 /* > The routine computes C, S, R, and optionally the unitary */
 /* > transformation matrices U, V and Q. */
 /* > */
 /* > In particular, if B is an N-by-N nonsingular matrix, then the GSVD of */
 /* > A and B implicitly gives the SVD of A*inv(B): */
 /* >                      A*inv(B) = U*(D1*inv(D2))*V**H. */
 /* > If ( A**H,B**H)**H has orthonormal columns, then the GSVD of A and B is also */
 /* > equal to the CS decomposition of A and B. Furthermore, the GSVD can */
 /* > be used to derive the solution of the eigenvalue problem: */
 /* >                      A**H*A x = lambda* B**H*B x. */
 /* > In some literature, the GSVD of A and B is presented in the form */
 /* >                  U**H*A*X = ( 0 D1 ),   V**H*B*X = ( 0 D2 ) */
 /* > where U and V are orthogonal and X is nonsingular, and D1 and D2 are */
 /* > ``diagonal''.  The former GSVD form can be converted to the latter */
 /* > form by taking the nonsingular matrix X as */
 /* > */
 /* >                       X = Q*(  I   0    ) */
 /* >                             (  0 inv(R) ) */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBU */
 /* > \verbatim */
 /* >          JOBU is CHARACTER*1 */
 /* >          = 'U':  Unitary matrix U is computed; */
 /* >          = 'N':  U is not computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBV */
 /* > \verbatim */
 /* >          JOBV is CHARACTER*1 */
 /* >          = 'V':  Unitary matrix V is computed; */
 /* >          = 'N':  V is not computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBQ */
 /* > \verbatim */
 /* >          JOBQ is CHARACTER*1 */
 /* >          = 'Q':  Unitary matrix Q is computed; */
 /* >          = 'N':  Q is not computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of rows of the matrix B.  P >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[out] L */
 /* > \verbatim */
 /* >          L is INTEGER */
 /* > */
 /* >          On exit, K and L specify the dimension of the subblocks */
 /* >          described in Purpose. */
 /* >          K + L = effective numerical rank of (A**H,B**H)**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, A contains the triangular matrix R, or part of R. */
 /* >          See Purpose for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,N) */
 /* >          On entry, the P-by-N matrix B. */
 /* >          On exit, B contains part of the triangular matrix R if */
 /* >          M-K-L < 0.  See Purpose for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ALPHA */
 /* > \verbatim */
 /* >          ALPHA is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BETA */
 /* > \verbatim */
 /* >          BETA is REAL array, dimension (N) */
 /* > */
 /* >          On exit, ALPHA and BETA contain the generalized singular */
 /* >          value pairs of A and B; */
 /* >            ALPHA(1:K) = 1, */
 /* >            BETA(1:K)  = 0, */
 /* >          and if M-K-L >= 0, */
 /* >            ALPHA(K+1:K+L) = C, */
 /* >            BETA(K+1:K+L)  = S, */
 /* >          or if M-K-L < 0, */
 /* >            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 */
 /* >            BETA(K+1:M) =S, BETA(M+1:K+L) =1 */
 /* >          and */
 /* >            ALPHA(K+L+1:N) = 0 */
 /* >            BETA(K+L+1:N)  = 0 */
 /* > \endverbatim */
 /* > */
 /* > \param[out] U */
 /* > \verbatim */
 /* >          U is COMPLEX array, dimension (LDU,M) */
 /* >          If JOBU = 'U', U contains the M-by-M unitary matrix U. */
 /* >          If JOBU = 'N', U is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDU */
 /* > \verbatim */
 /* >          LDU is INTEGER */
 /* >          The leading dimension of the array U. LDU >= f2cmax(1,M) if */
 /* >          JOBU = 'U'; LDU >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] V */
 /* > \verbatim */
 /* >          V is COMPLEX array, dimension (LDV,P) */
 /* >          If JOBV = 'V', V contains the P-by-P unitary matrix V. */
 /* >          If JOBV = 'N', V is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the array V. LDV >= f2cmax(1,P) if */
 /* >          JOBV = 'V'; LDV >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Q */
 /* > \verbatim */
 /* >          Q is COMPLEX array, dimension (LDQ,N) */
 /* >          If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. */
 /* >          If JOBQ = 'N', Q is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDQ */
 /* > \verbatim */
 /* >          LDQ is INTEGER */
 /* >          The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */
 /* >          JOBQ = 'Q'; LDQ >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* >          On exit, IWORK stores the sorting information. More */
 /* >          precisely, the following loop will sort ALPHA */
 /* >             for I = K+1, f2cmin(M,K+L) */
 /* >                 swap ALPHA(I) and ALPHA(IWORK(I)) */
 /* >             endfor */
 /* >          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = 1, the Jacobi-type procedure failed to */
 /* >                converge.  For further details, see subroutine CTGSJA. */
 /* > \endverbatim */

 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  TOLA    REAL */
 /* >  TOLB    REAL */
 /* >          TOLA and TOLB are the thresholds to determine the effective */
 /* >          rank of (A**H,B**H)**H. Generally, they are set to */
 /* >                   TOLA = MAX(M,N)*norm(A)*MACHEPS, */
 /* >                   TOLB = MAX(P,N)*norm(B)*MACHEPS. */
 /* >          The size of TOLA and TOLB may affect the size of backward */
 /* >          errors of the decomposition. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date August 2015 */

 /* > \ingroup complexGEsing */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Ming Gu and Huan Ren, Computer Science Division, University of */
 /* >     California at Berkeley, USA */
 /* > */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* >  CGGSVD3 replaces the deprecated subroutine CGGSVD. */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int cggsvd3_(char *jobu, char *jobv, char *jobq, integer *m, 
 	integer *n, integer *p, integer *k, integer *l, complex *a, integer *
 	lda, complex *b, integer *ldb, real *alpha, real *beta, complex *u, 
 	integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, 
 	complex *work, integer *lwork, real *rwork, integer *iwork, integer *
 	info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, 
 	    u_offset, v_dim1, v_offset, i__1, i__2;
    complex q__1;

    /* Local variables */
    integer ibnd;
    real tola;
    integer isub;
    real tolb, unfl, temp, smax;
    integer ncallmycycle, i__, j;
    extern logical lsame_(char *, char *);
    real anorm, bnorm;
    logical wantq;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
 	    integer *);
    logical wantu, wantv;
    extern real clange_(char *, integer *, integer *, complex *, integer *, 
 	    real *), slamch_(char *);
    extern /* Subroutine */ int ctgsja_(char *, char *, char *, integer *, 
 	    integer *, integer *, integer *, integer *, complex *, integer *, 
 	    complex *, integer *, real *, real *, real *, real *, complex *, 
 	    integer *, complex *, integer *, complex *, integer *, complex *, 
 	    integer *, integer *), xerbla_(char *, 
 	    integer *, ftnlen);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int cggsvp3_(char *, char *, char *, integer *, 
 	    integer *, integer *, complex *, integer *, complex *, integer *, 
 	    real *, real *, integer *, integer *, complex *, integer *, 
 	    complex *, integer *, complex *, integer *, integer *, real *, 
 	    complex *, complex *, integer *, integer *);
    real ulp;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     August 2015 */


 /*  ===================================================================== */


 /*     Decode and test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --alpha;
    --beta;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantu = lsame_(jobu, "U");
    wantv = lsame_(jobv, "V");
    wantq = lsame_(jobq, "Q");
    lquery = *lwork == -1;
    lwkopt = 1;

 /*     Test the input arguments */

    *info = 0;
    if (! (wantu || lsame_(jobu, "N"))) {
 	*info = -1;
    } else if (! (wantv || lsame_(jobv, "N"))) {
 	*info = -2;
    } else if (! (wantq || lsame_(jobq, "N"))) {
 	*info = -3;
    } else if (*m < 0) {
 	*info = -4;
    } else if (*n < 0) {
 	*info = -5;
    } else if (*p < 0) {
 	*info = -6;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -10;
    } else if (*ldb < f2cmax(1,*p)) {
 	*info = -12;
    } else if (*ldu < 1 || wantu && *ldu < *m) {
 	*info = -16;
    } else if (*ldv < 1 || wantv && *ldv < *p) {
 	*info = -18;
    } else if (*ldq < 1 || wantq && *ldq < *n) {
 	*info = -20;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -24;
    }

 /*     Compute workspace */

    if (*info == 0) {
 	cggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], 
 		ldb, &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv,
 		 &q[q_offset], ldq, &iwork[1], &rwork[1], &work[1], &work[1], 
 		&c_n1, info);
 	lwkopt = *n + (integer) work[1].r;
 /* Computing MAX */
 	i__1 = *n << 1;
 	lwkopt = f2cmax(i__1,lwkopt);
 	lwkopt = f2cmax(1,lwkopt);
 	q__1.r = (real) lwkopt, q__1.i = 0.f;
 	work[1].r = q__1.r, work[1].i = q__1.i;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGGSVD3", &i__1, (ftnlen)7);
 	return 0;
    }
    if (lquery) {
 	return 0;
    }

 /*     Compute the Frobenius norm of matrices A and B */

    anorm = clange_("1", m, n, &a[a_offset], lda, &rwork[1]);
    bnorm = clange_("1", p, n, &b[b_offset], ldb, &rwork[1]);

 /*     Get machine precision and set up threshold for determining */
 /*     the effective numerical rank of the matrices A and B. */

    ulp = slamch_("Precision");
    unfl = slamch_("Safe Minimum");
    tola = f2cmax(*m,*n) * f2cmax(anorm,unfl) * ulp;
    tolb = f2cmax(*p,*n) * f2cmax(bnorm,unfl) * ulp;

    i__1 = *lwork - *n;
    cggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], ldb, 
 	    &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, &q[
 	    q_offset], ldq, &iwork[1], &rwork[1], &work[1], &work[*n + 1], &
 	    i__1, info);

 /*     Compute the GSVD of two upper "triangular" matrices */

    ctgsja_(jobu, jobv, jobq, m, p, n, k, l, &a[a_offset], lda, &b[b_offset], 
 	    ldb, &tola, &tolb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[
 	    v_offset], ldv, &q[q_offset], ldq, &work[1], &ncallmycycle, info);

 /*     Sort the singular values and store the pivot indices in IWORK */
 /*     Copy ALPHA to RWORK, then sort ALPHA in RWORK */

    scopy_(n, &alpha[1], &c__1, &rwork[1], &c__1);
 /* Computing MIN */
    i__1 = *l, i__2 = *m - *k;
    ibnd = f2cmin(i__1,i__2);
    i__1 = ibnd;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Scan for largest ALPHA(K+I) */

 	isub = i__;
 	smax = rwork[*k + i__];
 	i__2 = ibnd;
 	for (j = i__ + 1; j <= i__2; ++j) {
 	    temp = rwork[*k + j];
 	    if (temp > smax) {
 		isub = j;
 		smax = temp;
 	    }
 /* L10: */
 	}
 	if (isub != i__) {
 	    rwork[*k + isub] = rwork[*k + i__];
 	    rwork[*k + i__] = smax;
 	    iwork[*k + i__] = *k + isub;
 	} else {
 	    iwork[*k + i__] = *k + i__;
 	}
 /* L20: */
    }

    q__1.r = (real) lwkopt, q__1.i = 0.f;
    work[1].r = q__1.r, work[1].i = q__1.i;
    return 0;

 /*     End of CGGSVD3 */

 } /* cggsvd3_ */

--- a/lapack-netlib/SRC/cggsvp3.c
+++ b/lapack-netlib/SRC/cggsvp3.c
--- a/lapack-netlib/SRC/cgsvj0.c
+++ b/lapack-netlib/SRC/cgsvj0.c
--- a/lapack-netlib/SRC/cgsvj1.c
+++ b/lapack-netlib/SRC/cgsvj1.c
--- a/lapack-netlib/SRC/cgtcon.c
+++ b/lapack-netlib/SRC/cgtcon.c
@@ -0,0 +1,645 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b CGTCON */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGTCON + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgtcon.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgtcon.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgtcon.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, */
 /*                          WORK, INFO ) */

 /*       CHARACTER          NORM */
 /*       INTEGER            INFO, N */
 /*       REAL               ANORM, RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGTCON estimates the reciprocal of the condition number of a complex */
 /* > tridiagonal matrix A using the LU factorization as computed by */
 /* > CGTTRF. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] NORM */
 /* > \verbatim */
 /* >          NORM is CHARACTER*1 */
 /* >          Specifies whether the 1-norm condition number or the */
 /* >          infinity-norm condition number is required: */
 /* >          = '1' or 'O':  1-norm; */
 /* >          = 'I':         Infinity-norm. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX array, dimension (N-1) */
 /* >          The (n-1) multipliers that define the matrix L from the */
 /* >          LU factorization of A as computed by CGTTRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is COMPLEX array, dimension (N) */
 /* >          The n diagonal elements of the upper triangular matrix U from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX array, dimension (N-1) */
 /* >          The (n-1) elements of the first superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU2 */
 /* > \verbatim */
 /* >          DU2 is COMPLEX array, dimension (N-2) */
 /* >          The (n-2) elements of the second superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is REAL */
 /* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
 /* >          If NORM = 'I', the infinity-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
 /* >          estimate of the 1-norm of inv(A) computed in this routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexGTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int cgtcon_(char *norm, integer *n, complex *dl, complex *
 	d__, complex *du, complex *du2, integer *ipiv, real *anorm, real *
 	rcond, complex *work, integer *info)
 {
    /* System generated locals */
    integer i__1, i__2;

    /* Local variables */
    integer kase, kase1, i__;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real 
 	    *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    real ainvnm;
    logical onenrm;
    extern /* Subroutine */ int cgttrs_(char *, integer *, integer *, complex 
 	    *, complex *, complex *, complex *, integer *, complex *, integer 
 	    *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments. */

    /* Parameter adjustments */
    --work;
    --ipiv;
    --du2;
    --du;
    --d__;
    --dl;

    /* Function Body */
    *info = 0;
    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
    if (! onenrm && ! lsame_(norm, "I")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*anorm < 0.f) {
 	*info = -8;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGTCON", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.f;
    if (*n == 0) {
 	*rcond = 1.f;
 	return 0;
    } else if (*anorm == 0.f) {
 	return 0;
    }

 /*     Check that D(1:N) is non-zero. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	if (d__[i__2].r == 0.f && d__[i__2].i == 0.f) {
 	    return 0;
 	}
 /* L10: */
    }

    ainvnm = 0.f;
    if (onenrm) {
 	kase1 = 1;
    } else {
 	kase1 = 2;
    }
    kase = 0;
 L20:
    clacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave);
    if (kase != 0) {
 	if (kase == kase1) {

 /*           Multiply by inv(U)*inv(L). */

 	    cgttrs_("No transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1]
 		    , &ipiv[1], &work[1], n, info);
 	} else {

 /*           Multiply by inv(L**H)*inv(U**H). */

 	    cgttrs_("Conjugate transpose", n, &c__1, &dl[1], &d__[1], &du[1], 
 		    &du2[1], &ipiv[1], &work[1], n, info);
 	}
 	goto L20;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.f) {
 	*rcond = 1.f / ainvnm / *anorm;
    }

    return 0;

 /*     End of CGTCON */

 } /* cgtcon_ */

--- a/lapack-netlib/SRC/cgtrfs.c
+++ b/lapack-netlib/SRC/cgtrfs.c
--- a/lapack-netlib/SRC/cgtsv.c
+++ b/lapack-netlib/SRC/cgtsv.c
@@ -0,0 +1,710 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief <b> CGTSV computes the solution to system of linear equations A * X = B for GT matrices </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGTSV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgtsv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgtsv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgtsv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) */

 /*       INTEGER            INFO, LDB, N, NRHS */
 /*       COMPLEX            B( LDB, * ), D( * ), DL( * ), DU( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGTSV  solves the equation */
 /* > */
 /* >    A*X = B, */
 /* > */
 /* > where A is an N-by-N tridiagonal matrix, by Gaussian elimination with */
 /* > partial pivoting. */
 /* > */
 /* > Note that the equation  A**T *X = B  may be solved by interchanging the */
 /* > order of the arguments DU and DL. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX array, dimension (N-1) */
 /* >          On entry, DL must contain the (n-1) subdiagonal elements of */
 /* >          A. */
 /* >          On exit, DL is overwritten by the (n-2) elements of the */
 /* >          second superdiagonal of the upper triangular matrix U from */
 /* >          the LU factorization of A, in DL(1), ..., DL(n-2). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is COMPLEX array, dimension (N) */
 /* >          On entry, D must contain the diagonal elements of A. */
 /* >          On exit, D is overwritten by the n diagonal elements of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX array, dimension (N-1) */
 /* >          On entry, DU must contain the (n-1) superdiagonal elements */
 /* >          of A. */
 /* >          On exit, DU is overwritten by the (n-1) elements of the first */
 /* >          superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero, and the solution */
 /* >                has not been computed.  The factorization has not been */
 /* >                completed unless i = N. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexGTsolve */

 /*  ===================================================================== */
 /* Subroutine */ int cgtsv_(integer *n, integer *nrhs, complex *dl, complex *
 	d__, complex *du, complex *b, integer *ldb, integer *info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
    real r__1, r__2, r__3, r__4;
    complex q__1, q__2, q__3, q__4, q__5;

    /* Local variables */
    complex temp, mult;
    integer j, k;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
    } else if (*nrhs < 0) {
 	*info = -2;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGTSV ", &i__1, (ftnlen)6);
 	return 0;
    }

    if (*n == 0) {
 	return 0;
    }

    i__1 = *n - 1;
    for (k = 1; k <= i__1; ++k) {
 	i__2 = k;
 	if (dl[i__2].r == 0.f && dl[i__2].i == 0.f) {

 /*           Subdiagonal is zero, no elimination is required. */

 	    i__2 = k;
 	    if (d__[i__2].r == 0.f && d__[i__2].i == 0.f) {

 /*              Diagonal is zero: set INFO = K and return; a unique */
 /*              solution can not be found. */

 		*info = k;
 		return 0;
 	    }
 	} else /* if(complicated condition) */ {
 	    i__2 = k;
 	    i__3 = k;
 	    if ((r__1 = d__[i__2].r, abs(r__1)) + (r__2 = r_imag(&d__[k]), 
 		    abs(r__2)) >= (r__3 = dl[i__3].r, abs(r__3)) + (r__4 = 
 		    r_imag(&dl[k]), abs(r__4))) {

 /*           No row interchange required */

 		c_div(&q__1, &dl[k], &d__[k]);
 		mult.r = q__1.r, mult.i = q__1.i;
 		i__2 = k + 1;
 		i__3 = k + 1;
 		i__4 = k;
 		q__2.r = mult.r * du[i__4].r - mult.i * du[i__4].i, q__2.i = 
 			mult.r * du[i__4].i + mult.i * du[i__4].r;
 		q__1.r = d__[i__3].r - q__2.r, q__1.i = d__[i__3].i - q__2.i;
 		d__[i__2].r = q__1.r, d__[i__2].i = q__1.i;
 		i__2 = *nrhs;
 		for (j = 1; j <= i__2; ++j) {
 		    i__3 = k + 1 + j * b_dim1;
 		    i__4 = k + 1 + j * b_dim1;
 		    i__5 = k + j * b_dim1;
 		    q__2.r = mult.r * b[i__5].r - mult.i * b[i__5].i, q__2.i =
 			     mult.r * b[i__5].i + mult.i * b[i__5].r;
 		    q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4].i - q__2.i;
 		    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
 /* L10: */
 		}
 		if (k < *n - 1) {
 		    i__2 = k;
 		    dl[i__2].r = 0.f, dl[i__2].i = 0.f;
 		}
 	    } else {

 /*           Interchange rows K and K+1 */

 		c_div(&q__1, &d__[k], &dl[k]);
 		mult.r = q__1.r, mult.i = q__1.i;
 		i__2 = k;
 		i__3 = k;
 		d__[i__2].r = dl[i__3].r, d__[i__2].i = dl[i__3].i;
 		i__2 = k + 1;
 		temp.r = d__[i__2].r, temp.i = d__[i__2].i;
 		i__2 = k + 1;
 		i__3 = k;
 		q__2.r = mult.r * temp.r - mult.i * temp.i, q__2.i = mult.r * 
 			temp.i + mult.i * temp.r;
 		q__1.r = du[i__3].r - q__2.r, q__1.i = du[i__3].i - q__2.i;
 		d__[i__2].r = q__1.r, d__[i__2].i = q__1.i;
 		if (k < *n - 1) {
 		    i__2 = k;
 		    i__3 = k + 1;
 		    dl[i__2].r = du[i__3].r, dl[i__2].i = du[i__3].i;
 		    i__2 = k + 1;
 		    q__2.r = -mult.r, q__2.i = -mult.i;
 		    i__3 = k;
 		    q__1.r = q__2.r * dl[i__3].r - q__2.i * dl[i__3].i, 
 			    q__1.i = q__2.r * dl[i__3].i + q__2.i * dl[i__3]
 			    .r;
 		    du[i__2].r = q__1.r, du[i__2].i = q__1.i;
 		}
 		i__2 = k;
 		du[i__2].r = temp.r, du[i__2].i = temp.i;
 		i__2 = *nrhs;
 		for (j = 1; j <= i__2; ++j) {
 		    i__3 = k + j * b_dim1;
 		    temp.r = b[i__3].r, temp.i = b[i__3].i;
 		    i__3 = k + j * b_dim1;
 		    i__4 = k + 1 + j * b_dim1;
 		    b[i__3].r = b[i__4].r, b[i__3].i = b[i__4].i;
 		    i__3 = k + 1 + j * b_dim1;
 		    i__4 = k + 1 + j * b_dim1;
 		    q__2.r = mult.r * b[i__4].r - mult.i * b[i__4].i, q__2.i =
 			     mult.r * b[i__4].i + mult.i * b[i__4].r;
 		    q__1.r = temp.r - q__2.r, q__1.i = temp.i - q__2.i;
 		    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
 /* L20: */
 		}
 	    }
 	}
 /* L30: */
    }
    i__1 = *n;
    if (d__[i__1].r == 0.f && d__[i__1].i == 0.f) {
 	*info = *n;
 	return 0;
    }

 /*     Back solve with the matrix U from the factorization. */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *n + j * b_dim1;
 	c_div(&q__1, &b[*n + j * b_dim1], &d__[*n]);
 	b[i__2].r = q__1.r, b[i__2].i = q__1.i;
 	if (*n > 1) {
 	    i__2 = *n - 1 + j * b_dim1;
 	    i__3 = *n - 1 + j * b_dim1;
 	    i__4 = *n - 1;
 	    i__5 = *n + j * b_dim1;
 	    q__3.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, q__3.i =
 		     du[i__4].r * b[i__5].i + du[i__4].i * b[i__5].r;
 	    q__2.r = b[i__3].r - q__3.r, q__2.i = b[i__3].i - q__3.i;
 	    c_div(&q__1, &q__2, &d__[*n - 1]);
 	    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
 	}
 	for (k = *n - 2; k >= 1; --k) {
 	    i__2 = k + j * b_dim1;
 	    i__3 = k + j * b_dim1;
 	    i__4 = k;
 	    i__5 = k + 1 + j * b_dim1;
 	    q__4.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, q__4.i =
 		     du[i__4].r * b[i__5].i + du[i__4].i * b[i__5].r;
 	    q__3.r = b[i__3].r - q__4.r, q__3.i = b[i__3].i - q__4.i;
 	    i__6 = k;
 	    i__7 = k + 2 + j * b_dim1;
 	    q__5.r = dl[i__6].r * b[i__7].r - dl[i__6].i * b[i__7].i, q__5.i =
 		     dl[i__6].r * b[i__7].i + dl[i__6].i * b[i__7].r;
 	    q__2.r = q__3.r - q__5.r, q__2.i = q__3.i - q__5.i;
 	    c_div(&q__1, &q__2, &d__[k]);
 	    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
 /* L40: */
 	}
 /* L50: */
    }

    return 0;

 /*     End of CGTSV */

 } /* cgtsv_ */

--- a/lapack-netlib/SRC/cgtsvx.c
+++ b/lapack-netlib/SRC/cgtsvx.c
@@ -0,0 +1,829 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief <b> CGTSVX computes the solution to system of linear equations A * X = B for GT matrices </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGTSVX + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgtsvx.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgtsvx.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgtsvx.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, */
 /*                          DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, */
 /*                          WORK, RWORK, INFO ) */

 /*       CHARACTER          FACT, TRANS */
 /*       INTEGER            INFO, LDB, LDX, N, NRHS */
 /*       REAL               RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               BERR( * ), FERR( * ), RWORK( * ) */
 /*       COMPLEX            B( LDB, * ), D( * ), DF( * ), DL( * ), */
 /*      $                   DLF( * ), DU( * ), DU2( * ), DUF( * ), */
 /*      $                   WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGTSVX uses the LU factorization to compute the solution to a complex */
 /* > system of linear equations A * X = B, A**T * X = B, or A**H * X = B, */
 /* > where A is a tridiagonal matrix of order N and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > Error bounds on the solution and a condition estimate are also */
 /* > provided. */
 /* > \endverbatim */

 /* > \par Description: */
 /*  ================= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > The following steps are performed: */
 /* > */
 /* > 1. If FACT = 'N', the LU decomposition is used to factor the matrix A */
 /* >    as A = L * U, where L is a product of permutation and unit lower */
 /* >    bidiagonal matrices and U is upper triangular with nonzeros in */
 /* >    only the main diagonal and first two superdiagonals. */
 /* > */
 /* > 2. If some U(i,i)=0, so that U is exactly singular, then the routine */
 /* >    returns with INFO = i. Otherwise, the factored form of A is used */
 /* >    to estimate the condition number of the matrix A.  If the */
 /* >    reciprocal of the condition number is less than machine precision, */
 /* >    INFO = N+1 is returned as a warning, but the routine still goes on */
 /* >    to solve for X and compute error bounds as described below. */
 /* > */
 /* > 3. The system of equations is solved for X using the factored form */
 /* >    of A. */
 /* > */
 /* > 4. Iterative refinement is applied to improve the computed solution */
 /* >    matrix and calculate error bounds and backward error estimates */
 /* >    for it. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] FACT */
 /* > \verbatim */
 /* >          FACT is CHARACTER*1 */
 /* >          Specifies whether or not the factored form of A has been */
 /* >          supplied on entry. */
 /* >          = 'F':  DLF, DF, DUF, DU2, and IPIV contain the factored form */
 /* >                  of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not */
 /* >                  be modified. */
 /* >          = 'N':  The matrix will be copied to DLF, DF, and DUF */
 /* >                  and factored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX array, dimension (N-1) */
 /* >          The (n-1) subdiagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is COMPLEX array, dimension (N) */
 /* >          The n diagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX array, dimension (N-1) */
 /* >          The (n-1) superdiagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DLF */
 /* > \verbatim */
 /* >          DLF is COMPLEX array, dimension (N-1) */
 /* >          If FACT = 'F', then DLF is an input argument and on entry */
 /* >          contains the (n-1) multipliers that define the matrix L from */
 /* >          the LU factorization of A as computed by CGTTRF. */
 /* > */
 /* >          If FACT = 'N', then DLF is an output argument and on exit */
 /* >          contains the (n-1) multipliers that define the matrix L from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DF */
 /* > \verbatim */
 /* >          DF is COMPLEX array, dimension (N) */
 /* >          If FACT = 'F', then DF is an input argument and on entry */
 /* >          contains the n diagonal elements of the upper triangular */
 /* >          matrix U from the LU factorization of A. */
 /* > */
 /* >          If FACT = 'N', then DF is an output argument and on exit */
 /* >          contains the n diagonal elements of the upper triangular */
 /* >          matrix U from the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DUF */
 /* > \verbatim */
 /* >          DUF is COMPLEX array, dimension (N-1) */
 /* >          If FACT = 'F', then DUF is an input argument and on entry */
 /* >          contains the (n-1) elements of the first superdiagonal of U. */
 /* > */
 /* >          If FACT = 'N', then DUF is an output argument and on exit */
 /* >          contains the (n-1) elements of the first superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DU2 */
 /* > \verbatim */
 /* >          DU2 is COMPLEX array, dimension (N-2) */
 /* >          If FACT = 'F', then DU2 is an input argument and on entry */
 /* >          contains the (n-2) elements of the second superdiagonal of */
 /* >          U. */
 /* > */
 /* >          If FACT = 'N', then DU2 is an output argument and on exit */
 /* >          contains the (n-2) elements of the second superdiagonal of */
 /* >          U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          If FACT = 'F', then IPIV is an input argument and on entry */
 /* >          contains the pivot indices from the LU factorization of A as */
 /* >          computed by CGTTRF. */
 /* > */
 /* >          If FACT = 'N', then IPIV is an output argument and on exit */
 /* >          contains the pivot indices from the LU factorization of A; */
 /* >          row i of the matrix was interchanged with row IPIV(i). */
 /* >          IPIV(i) will always be either i or i+1; IPIV(i) = i indicates */
 /* >          a row interchange was not required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          The N-by-NRHS right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is COMPLEX array, dimension (LDX,NRHS) */
 /* >          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          The estimate of the reciprocal condition number of the matrix */
 /* >          A.  If RCOND is less than the machine precision (in */
 /* >          particular, if RCOND = 0), the matrix is singular to working */
 /* >          precision.  This condition is indicated by a return code of */
 /* >          INFO > 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is REAL array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is REAL array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is */
 /* >                <= N:  U(i,i) is exactly zero.  The factorization */
 /* >                       has not been completed unless i = N, but the */
 /* >                       factor U is exactly singular, so the solution */
 /* >                       and error bounds could not be computed. */
 /* >                       RCOND = 0 is returned. */
 /* >                = N+1: U is nonsingular, but RCOND is less than machine */
 /* >                       precision, meaning that the matrix is singular */
 /* >                       to working precision.  Nevertheless, the */
 /* >                       solution and error bounds are computed because */
 /* >                       there are a number of situations where the */
 /* >                       computed solution can be more accurate than the */
 /* >                       value of RCOND would suggest. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexGTsolve */

 /*  ===================================================================== */
 /* Subroutine */ int cgtsvx_(char *fact, char *trans, integer *n, integer *
 	nrhs, complex *dl, complex *d__, complex *du, complex *dlf, complex *
 	df, complex *duf, complex *du2, integer *ipiv, complex *b, integer *
 	ldb, complex *x, integer *ldx, real *rcond, real *ferr, real *berr, 
 	complex *work, real *rwork, integer *info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1;

    /* Local variables */
    char norm[1];
    extern logical lsame_(char *, char *);
    real anorm;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
 	    complex *, integer *);
    extern real slamch_(char *), clangt_(char *, integer *, complex *,
 	     complex *, complex *);
    logical nofact;
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
 	    *, integer *, complex *, integer *), cgtcon_(char *, 
 	    integer *, complex *, complex *, complex *, complex *, integer *, 
 	    real *, real *, complex *, integer *), xerbla_(char *, 
 	    integer *, ftnlen), cgtrfs_(char *, integer *, integer *, complex 
 	    *, complex *, complex *, complex *, complex *, complex *, complex 
 	    *, integer *, complex *, integer *, complex *, integer *, real *, 
 	    real *, complex *, real *, integer *), cgttrf_(integer *, 
 	    complex *, complex *, complex *, complex *, integer *, integer *);
    logical notran;
    extern /* Subroutine */ int cgttrs_(char *, integer *, integer *, complex 
 	    *, complex *, complex *, complex *, integer *, complex *, integer 
 	    *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    --dlf;
    --df;
    --duf;
    --du2;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    nofact = lsame_(fact, "N");
    notran = lsame_(trans, "N");
    if (! nofact && ! lsame_(fact, "F")) {
 	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! 
 	    lsame_(trans, "C")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -14;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -16;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGTSVX", &i__1, (ftnlen)6);
 	return 0;
    }

    if (nofact) {

 /*        Compute the LU factorization of A. */

 	ccopy_(n, &d__[1], &c__1, &df[1], &c__1);
 	if (*n > 1) {
 	    i__1 = *n - 1;
 	    ccopy_(&i__1, &dl[1], &c__1, &dlf[1], &c__1);
 	    i__1 = *n - 1;
 	    ccopy_(&i__1, &du[1], &c__1, &duf[1], &c__1);
 	}
 	cgttrf_(n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], info);

 /*        Return if INFO is non-zero. */

 	if (*info > 0) {
 	    *rcond = 0.f;
 	    return 0;
 	}
    }

 /*     Compute the norm of the matrix A. */

    if (notran) {
 	*(unsigned char *)norm = '1';
    } else {
 	*(unsigned char *)norm = 'I';
    }
    anorm = clangt_(norm, n, &dl[1], &d__[1], &du[1]);

 /*     Compute the reciprocal of the condition number of A. */

    cgtcon_(norm, n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &anorm, 
 	    rcond, &work[1], info);

 /*     Compute the solution vectors X. */

    clacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
    cgttrs_(trans, n, nrhs, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &x[
 	    x_offset], ldx, info);

 /*     Use iterative refinement to improve the computed solutions and */
 /*     compute error bounds and backward error estimates for them. */

    cgtrfs_(trans, n, nrhs, &dl[1], &d__[1], &du[1], &dlf[1], &df[1], &duf[1],
 	     &du2[1], &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1]
 	    , &berr[1], &work[1], &rwork[1], info);

 /*     Set INFO = N+1 if the matrix is singular to working precision. */

    if (*rcond < slamch_("Epsilon")) {
 	*info = *n + 1;
    }

    return 0;

 /*     End of CGTSVX */

 } /* cgtsvx_ */

--- a/lapack-netlib/SRC/cgttrf.c
+++ b/lapack-netlib/SRC/cgttrf.c
@@ -0,0 +1,695 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b CGTTRF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGTTRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgttrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgttrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgttrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) */

 /*       INTEGER            INFO, N */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            D( * ), DL( * ), DU( * ), DU2( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGTTRF computes an LU factorization of a complex tridiagonal matrix A */
 /* > using elimination with partial pivoting and row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = L * U */
 /* > where L is a product of permutation and unit lower bidiagonal */
 /* > matrices and U is upper triangular with nonzeros in only the main */
 /* > diagonal and first two superdiagonals. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX array, dimension (N-1) */
 /* >          On entry, DL must contain the (n-1) sub-diagonal elements of */
 /* >          A. */
 /* > */
 /* >          On exit, DL is overwritten by the (n-1) multipliers that */
 /* >          define the matrix L from the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is COMPLEX array, dimension (N) */
 /* >          On entry, D must contain the diagonal elements of A. */
 /* > */
 /* >          On exit, D is overwritten by the n diagonal elements of the */
 /* >          upper triangular matrix U from the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX array, dimension (N-1) */
 /* >          On entry, DU must contain the (n-1) super-diagonal elements */
 /* >          of A. */
 /* > */
 /* >          On exit, DU is overwritten by the (n-1) elements of the first */
 /* >          super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] DU2 */
 /* > \verbatim */
 /* >          DU2 is COMPLEX array, dimension (N-2) */
 /* >          On exit, DU2 is overwritten by the (n-2) elements of the */
 /* >          second super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
 /* >          > 0:  if INFO = k, U(k,k) is exactly zero. The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and division by zero will occur if it is used */
 /* >                to solve a system of equations. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexGTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int cgttrf_(integer *n, complex *dl, complex *d__, complex *
 	du, complex *du2, integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer i__1, i__2, i__3, i__4;
    real r__1, r__2, r__3, r__4;
    complex q__1, q__2;

    /* Local variables */
    complex fact, temp;
    integer i__;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --ipiv;
    --du2;
    --du;
    --d__;
    --dl;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
 	i__1 = -(*info);
 	xerbla_("CGTTRF", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Initialize IPIV(i) = i and DU2(i) = 0 */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	ipiv[i__] = i__;
 /* L10: */
    }
    i__1 = *n - 2;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	du2[i__2].r = 0.f, du2[i__2].i = 0.f;
 /* L20: */
    }

    i__1 = *n - 2;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	i__3 = i__;
 	if ((r__1 = d__[i__2].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), abs(
 		r__2)) >= (r__3 = dl[i__3].r, abs(r__3)) + (r__4 = r_imag(&dl[
 		i__]), abs(r__4))) {

 /*           No row interchange required, eliminate DL(I) */

 	    i__2 = i__;
 	    if ((r__1 = d__[i__2].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), 
 		    abs(r__2)) != 0.f) {
 		c_div(&q__1, &dl[i__], &d__[i__]);
 		fact.r = q__1.r, fact.i = q__1.i;
 		i__2 = i__;
 		dl[i__2].r = fact.r, dl[i__2].i = fact.i;
 		i__2 = i__ + 1;
 		i__3 = i__ + 1;
 		i__4 = i__;
 		q__2.r = fact.r * du[i__4].r - fact.i * du[i__4].i, q__2.i = 
 			fact.r * du[i__4].i + fact.i * du[i__4].r;
 		q__1.r = d__[i__3].r - q__2.r, q__1.i = d__[i__3].i - q__2.i;
 		d__[i__2].r = q__1.r, d__[i__2].i = q__1.i;
 	    }
 	} else {

 /*           Interchange rows I and I+1, eliminate DL(I) */

 	    c_div(&q__1, &d__[i__], &dl[i__]);
 	    fact.r = q__1.r, fact.i = q__1.i;
 	    i__2 = i__;
 	    i__3 = i__;
 	    d__[i__2].r = dl[i__3].r, d__[i__2].i = dl[i__3].i;
 	    i__2 = i__;
 	    dl[i__2].r = fact.r, dl[i__2].i = fact.i;
 	    i__2 = i__;
 	    temp.r = du[i__2].r, temp.i = du[i__2].i;
 	    i__2 = i__;
 	    i__3 = i__ + 1;
 	    du[i__2].r = d__[i__3].r, du[i__2].i = d__[i__3].i;
 	    i__2 = i__ + 1;
 	    i__3 = i__ + 1;
 	    q__2.r = fact.r * d__[i__3].r - fact.i * d__[i__3].i, q__2.i = 
 		    fact.r * d__[i__3].i + fact.i * d__[i__3].r;
 	    q__1.r = temp.r - q__2.r, q__1.i = temp.i - q__2.i;
 	    d__[i__2].r = q__1.r, d__[i__2].i = q__1.i;
 	    i__2 = i__;
 	    i__3 = i__ + 1;
 	    du2[i__2].r = du[i__3].r, du2[i__2].i = du[i__3].i;
 	    i__2 = i__ + 1;
 	    q__2.r = -fact.r, q__2.i = -fact.i;
 	    i__3 = i__ + 1;
 	    q__1.r = q__2.r * du[i__3].r - q__2.i * du[i__3].i, q__1.i = 
 		    q__2.r * du[i__3].i + q__2.i * du[i__3].r;
 	    du[i__2].r = q__1.r, du[i__2].i = q__1.i;
 	    ipiv[i__] = i__ + 1;
 	}
 /* L30: */
    }
    if (*n > 1) {
 	i__ = *n - 1;
 	i__1 = i__;
 	i__2 = i__;
 	if ((r__1 = d__[i__1].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), abs(
 		r__2)) >= (r__3 = dl[i__2].r, abs(r__3)) + (r__4 = r_imag(&dl[
 		i__]), abs(r__4))) {
 	    i__1 = i__;
 	    if ((r__1 = d__[i__1].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), 
 		    abs(r__2)) != 0.f) {
 		c_div(&q__1, &dl[i__], &d__[i__]);
 		fact.r = q__1.r, fact.i = q__1.i;
 		i__1 = i__;
 		dl[i__1].r = fact.r, dl[i__1].i = fact.i;
 		i__1 = i__ + 1;
 		i__2 = i__ + 1;
 		i__3 = i__;
 		q__2.r = fact.r * du[i__3].r - fact.i * du[i__3].i, q__2.i = 
 			fact.r * du[i__3].i + fact.i * du[i__3].r;
 		q__1.r = d__[i__2].r - q__2.r, q__1.i = d__[i__2].i - q__2.i;
 		d__[i__1].r = q__1.r, d__[i__1].i = q__1.i;
 	    }
 	} else {
 	    c_div(&q__1, &d__[i__], &dl[i__]);
 	    fact.r = q__1.r, fact.i = q__1.i;
 	    i__1 = i__;
 	    i__2 = i__;
 	    d__[i__1].r = dl[i__2].r, d__[i__1].i = dl[i__2].i;
 	    i__1 = i__;
 	    dl[i__1].r = fact.r, dl[i__1].i = fact.i;
 	    i__1 = i__;
 	    temp.r = du[i__1].r, temp.i = du[i__1].i;
 	    i__1 = i__;
 	    i__2 = i__ + 1;
 	    du[i__1].r = d__[i__2].r, du[i__1].i = d__[i__2].i;
 	    i__1 = i__ + 1;
 	    i__2 = i__ + 1;
 	    q__2.r = fact.r * d__[i__2].r - fact.i * d__[i__2].i, q__2.i = 
 		    fact.r * d__[i__2].i + fact.i * d__[i__2].r;
 	    q__1.r = temp.r - q__2.r, q__1.i = temp.i - q__2.i;
 	    d__[i__1].r = q__1.r, d__[i__1].i = q__1.i;
 	    ipiv[i__] = i__ + 1;
 	}
    }

 /*     Check for a zero on the diagonal of U. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	if ((r__1 = d__[i__2].r, abs(r__1)) + (r__2 = r_imag(&d__[i__]), abs(
 		r__2)) == 0.f) {
 	    *info = i__;
 	    goto L50;
 	}
 /* L40: */
    }
 L50:

    return 0;

 /*     End of CGTTRF */

 } /* cgttrf_ */

--- a/lapack-netlib/SRC/cgttrs.c
+++ b/lapack-netlib/SRC/cgttrs.c
@@ -0,0 +1,637 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b CGTTRS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CGTTRS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cgttrs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cgttrs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cgttrs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, */
 /*                          INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CGTTRS solves one of the systems of equations */
 /* >    A * X = B,  A**T * X = B,  or  A**H * X = B, */
 /* > with a tridiagonal matrix A using the LU factorization computed */
 /* > by CGTTRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations. */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX array, dimension (N-1) */
 /* >          The (n-1) multipliers that define the matrix L from the */
 /* >          LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is COMPLEX array, dimension (N) */
 /* >          The n diagonal elements of the upper triangular matrix U from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX array, dimension (N-1) */
 /* >          The (n-1) elements of the first super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU2 */
 /* > \verbatim */
 /* >          DU2 is COMPLEX array, dimension (N-2) */
 /* >          The (n-2) elements of the second super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          On entry, the matrix of right hand side vectors B. */
 /* >          On exit, B is overwritten by the solution vectors X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexGTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int cgttrs_(char *trans, integer *n, integer *nrhs, complex *
 	dl, complex *d__, complex *du, complex *du2, integer *ipiv, complex *
 	b, integer *ldb, integer *info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer j;
    extern /* Subroutine */ int cgtts2_(integer *, integer *, integer *, 
 	    complex *, complex *, complex *, complex *, integer *, complex *, 
 	    integer *);
    integer jb, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer itrans;
    logical notran;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    --du2;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    notran = *(unsigned char *)trans == 'N' || *(unsigned char *)trans == 'n';
    if (! notran && ! (*(unsigned char *)trans == 'T' || *(unsigned char *)
 	    trans == 't') && ! (*(unsigned char *)trans == 'C' || *(unsigned 
 	    char *)trans == 'c')) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*ldb < f2cmax(*n,1)) {
 	*info = -10;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGTTRS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	return 0;
    }

 /*     Decode TRANS */

    if (notran) {
 	itrans = 0;
    } else if (*(unsigned char *)trans == 'T' || *(unsigned char *)trans == 
 	    't') {
 	itrans = 1;
    } else {
 	itrans = 2;
    }

 /*     Determine the number of right-hand sides to solve at a time. */

    if (*nrhs == 1) {
 	nb = 1;
    } else {
 /* Computing MAX */
 	i__1 = 1, i__2 = ilaenv_(&c__1, "CGTTRS", trans, n, nrhs, &c_n1, &
 		c_n1, (ftnlen)6, (ftnlen)1);
 	nb = f2cmax(i__1,i__2);
    }

    if (nb >= *nrhs) {
 	cgtts2_(&itrans, n, nrhs, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[1], 
 		&b[b_offset], ldb);
    } else {
 	i__1 = *nrhs;
 	i__2 = nb;
 	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 /* Computing MIN */
 	    i__3 = *nrhs - j + 1;
 	    jb = f2cmin(i__3,nb);
 	    cgtts2_(&itrans, n, &jb, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[
 		    1], &b[j * b_dim1 + 1], ldb);
 /* L10: */
 	}
    }

 /*     End of CGTTRS */

    return 0;
 } /* cgttrs_ */

--- a/lapack-netlib/SRC/cgtts2.c
+++ b/lapack-netlib/SRC/cgtts2.c
--- a/lapack-netlib/SRC/chb2st_kernels.c
+++ b/lapack-netlib/SRC/chb2st_kernels.c
@@ -0,0 +1,801 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b CHB2ST_KERNELS */

 /*  @generated from zhb2st_kernels.f, fortran z -> c, Wed Dec  7 08:22:40 2016 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHB2ST_KERNELS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chb2st_
 kernels.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chb2st_
 kernels.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chb2st_
 kernels.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE  CHB2ST_KERNELS( UPLO, WANTZ, TTYPE, */
 /*                                   ST, ED, SWEEP, N, NB, IB, */
 /*                                   A, LDA, V, TAU, LDVT, WORK) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          UPLO */
 /*       LOGICAL            WANTZ */
 /*       INTEGER            TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT */
 /*       COMPLEX            A( LDA, * ), V( * ), */
 /*                          TAU( * ), WORK( * ) */

 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST */
 /* > subroutine. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* > \endverbatim */
 /* > */
 /* > \param[in] WANTZ */
 /* > \verbatim */
 /* >          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both */
 /* >          Eigenvalue/Eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TTYPE */
 /* > \verbatim */
 /* >          TTYPE is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ST */
 /* > \verbatim */
 /* >          ST is INTEGER */
 /* >          internal parameter for indices. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ED */
 /* > \verbatim */
 /* >          ED is INTEGER */
 /* >          internal parameter for indices. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] SWEEP */
 /* > \verbatim */
 /* >          SWEEP is INTEGER */
 /* >          internal parameter for indices. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER. The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB */
 /* > \verbatim */
 /* >          NB is INTEGER. The size of the band. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IB */
 /* > \verbatim */
 /* >          IB is INTEGER. */
 /* > \endverbatim */
 /* > */
 /* > \param[in, out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array. A pointer to the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER. The leading dimension of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] V */
 /* > \verbatim */
 /* >          V is COMPLEX array, dimension 2*n if eigenvalues only are */
 /* >          requested or to be queried for vectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX array, dimension (2*n). */
 /* >          The scalar factors of the Householder reflectors are stored */
 /* >          in this array. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDVT */
 /* > \verbatim */
 /* >          LDVT is INTEGER. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array. Workspace of size nb. */
 /* > \endverbatim */
 /* > */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  Implemented by Azzam Haidar. */
 /* > */
 /* >  All details are available on technical report, SC11, SC13 papers. */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int chb2st_kernels_(char *uplo, logical *wantz, integer *
 	ttype, integer *st, integer *ed, integer *sweep, integer *n, integer *
 	nb, integer *ib, complex *a, integer *lda, complex *v, complex *tau, 
 	integer *ldvt, complex *work)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    complex q__1;

    /* Local variables */
    complex ctmp;
    integer dpos, vpos, i__;
    extern logical lsame_(char *, char *);
    logical upper;
    integer j1, j2, lm, ln;
    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, 
 	    integer *, complex *);
    integer ajeter;
    extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex 
 	    *, complex *, complex *, integer *, complex *), clarfy_(
 	    char *, integer *, complex *, integer *, complex *, complex *, 
 	    integer *, complex *);
    integer ofdpos, taupos;



 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --v;
    --tau;
    --work;

    /* Function Body */
    ajeter = *ib + *ldvt;
    upper = lsame_(uplo, "U");
    if (upper) {
 	dpos = (*nb << 1) + 1;
 	ofdpos = *nb << 1;
    } else {
 	dpos = 1;
 	ofdpos = 2;
    }

 /*     Upper case */

    if (upper) {

 	if (*wantz) {
 	    vpos = (*sweep - 1) % 2 * *n + *st;
 	    taupos = (*sweep - 1) % 2 * *n + *st;
 	} else {
 	    vpos = (*sweep - 1) % 2 * *n + *st;
 	    taupos = (*sweep - 1) % 2 * *n + *st;
 	}

 	if (*ttype == 1) {
 	    lm = *ed - *st + 1;

 	    i__1 = vpos;
 	    v[i__1].r = 1.f, v[i__1].i = 0.f;
 	    i__1 = lm - 1;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		i__2 = vpos + i__;
 		r_cnjg(&q__1, &a[ofdpos - i__ + (*st + i__) * a_dim1]);
 		v[i__2].r = q__1.r, v[i__2].i = q__1.i;
 		i__2 = ofdpos - i__ + (*st + i__) * a_dim1;
 		a[i__2].r = 0.f, a[i__2].i = 0.f;
 /* L10: */
 	    }
 	    r_cnjg(&q__1, &a[ofdpos + *st * a_dim1]);
 	    ctmp.r = q__1.r, ctmp.i = q__1.i;
 	    clarfg_(&lm, &ctmp, &v[vpos + 1], &c__1, &tau[taupos]);
 	    i__1 = ofdpos + *st * a_dim1;
 	    a[i__1].r = ctmp.r, a[i__1].i = ctmp.i;

 	    lm = *ed - *st + 1;
 	    r_cnjg(&q__1, &tau[taupos]);
 	    i__1 = *lda - 1;
 	    clarfy_(uplo, &lm, &v[vpos], &c__1, &q__1, &a[dpos + *st * a_dim1]
 		    , &i__1, &work[1]);
 	}

 	if (*ttype == 3) {

 	    lm = *ed - *st + 1;
 	    r_cnjg(&q__1, &tau[taupos]);
 	    i__1 = *lda - 1;
 	    clarfy_(uplo, &lm, &v[vpos], &c__1, &q__1, &a[dpos + *st * a_dim1]
 		    , &i__1, &work[1]);
 	}

 	if (*ttype == 2) {
 	    j1 = *ed + 1;
 /* Computing MIN */
 	    i__1 = *ed + *nb;
 	    j2 = f2cmin(i__1,*n);
 	    ln = *ed - *st + 1;
 	    lm = j2 - j1 + 1;
 	    if (lm > 0) {
 		r_cnjg(&q__1, &tau[taupos]);
 		i__1 = *lda - 1;
 		clarfx_("Left", &ln, &lm, &v[vpos], &q__1, &a[dpos - *nb + j1 
 			* a_dim1], &i__1, &work[1]);

 		if (*wantz) {
 		    vpos = (*sweep - 1) % 2 * *n + j1;
 		    taupos = (*sweep - 1) % 2 * *n + j1;
 		} else {
 		    vpos = (*sweep - 1) % 2 * *n + j1;
 		    taupos = (*sweep - 1) % 2 * *n + j1;
 		}

 		i__1 = vpos;
 		v[i__1].r = 1.f, v[i__1].i = 0.f;
 		i__1 = lm - 1;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    i__2 = vpos + i__;
 		    r_cnjg(&q__1, &a[dpos - *nb - i__ + (j1 + i__) * a_dim1]);
 		    v[i__2].r = q__1.r, v[i__2].i = q__1.i;
 		    i__2 = dpos - *nb - i__ + (j1 + i__) * a_dim1;
 		    a[i__2].r = 0.f, a[i__2].i = 0.f;
 /* L30: */
 		}
 		r_cnjg(&q__1, &a[dpos - *nb + j1 * a_dim1]);
 		ctmp.r = q__1.r, ctmp.i = q__1.i;
 		clarfg_(&lm, &ctmp, &v[vpos + 1], &c__1, &tau[taupos]);
 		i__1 = dpos - *nb + j1 * a_dim1;
 		a[i__1].r = ctmp.r, a[i__1].i = ctmp.i;

 		i__1 = ln - 1;
 		i__2 = *lda - 1;
 		clarfx_("Right", &i__1, &lm, &v[vpos], &tau[taupos], &a[dpos 
 			- *nb + 1 + j1 * a_dim1], &i__2, &work[1]);
 	    }
 	}

 /*     Lower case */

    } else {

 	if (*wantz) {
 	    vpos = (*sweep - 1) % 2 * *n + *st;
 	    taupos = (*sweep - 1) % 2 * *n + *st;
 	} else {
 	    vpos = (*sweep - 1) % 2 * *n + *st;
 	    taupos = (*sweep - 1) % 2 * *n + *st;
 	}

 	if (*ttype == 1) {
 	    lm = *ed - *st + 1;

 	    i__1 = vpos;
 	    v[i__1].r = 1.f, v[i__1].i = 0.f;
 	    i__1 = lm - 1;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		i__2 = vpos + i__;
 		i__3 = ofdpos + i__ + (*st - 1) * a_dim1;
 		v[i__2].r = a[i__3].r, v[i__2].i = a[i__3].i;
 		i__2 = ofdpos + i__ + (*st - 1) * a_dim1;
 		a[i__2].r = 0.f, a[i__2].i = 0.f;
 /* L20: */
 	    }
 	    clarfg_(&lm, &a[ofdpos + (*st - 1) * a_dim1], &v[vpos + 1], &c__1,
 		     &tau[taupos]);

 	    lm = *ed - *st + 1;

 	    r_cnjg(&q__1, &tau[taupos]);
 	    i__1 = *lda - 1;
 	    clarfy_(uplo, &lm, &v[vpos], &c__1, &q__1, &a[dpos + *st * a_dim1]
 		    , &i__1, &work[1]);
 	}

 	if (*ttype == 3) {
 	    lm = *ed - *st + 1;

 	    r_cnjg(&q__1, &tau[taupos]);
 	    i__1 = *lda - 1;
 	    clarfy_(uplo, &lm, &v[vpos], &c__1, &q__1, &a[dpos + *st * a_dim1]
 		    , &i__1, &work[1]);
 	}

 	if (*ttype == 2) {
 	    j1 = *ed + 1;
 /* Computing MIN */
 	    i__1 = *ed + *nb;
 	    j2 = f2cmin(i__1,*n);
 	    ln = *ed - *st + 1;
 	    lm = j2 - j1 + 1;

 	    if (lm > 0) {
 		i__1 = *lda - 1;
 		clarfx_("Right", &lm, &ln, &v[vpos], &tau[taupos], &a[dpos + *
 			nb + *st * a_dim1], &i__1, &work[1]);

 		if (*wantz) {
 		    vpos = (*sweep - 1) % 2 * *n + j1;
 		    taupos = (*sweep - 1) % 2 * *n + j1;
 		} else {
 		    vpos = (*sweep - 1) % 2 * *n + j1;
 		    taupos = (*sweep - 1) % 2 * *n + j1;
 		}

 		i__1 = vpos;
 		v[i__1].r = 1.f, v[i__1].i = 0.f;
 		i__1 = lm - 1;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    i__2 = vpos + i__;
 		    i__3 = dpos + *nb + i__ + *st * a_dim1;
 		    v[i__2].r = a[i__3].r, v[i__2].i = a[i__3].i;
 		    i__2 = dpos + *nb + i__ + *st * a_dim1;
 		    a[i__2].r = 0.f, a[i__2].i = 0.f;
 /* L40: */
 		}
 		clarfg_(&lm, &a[dpos + *nb + *st * a_dim1], &v[vpos + 1], &
 			c__1, &tau[taupos]);

 		i__1 = ln - 1;
 		r_cnjg(&q__1, &tau[taupos]);
 		i__2 = *lda - 1;
 		clarfx_("Left", &lm, &i__1, &v[vpos], &q__1, &a[dpos + *nb - 
 			1 + (*st + 1) * a_dim1], &i__2, &work[1]);
 	    }
 	}
    }

    return 0;

 /*     END OF CHB2ST_KERNELS */

 } /* chb2st_kernels__ */

--- a/lapack-netlib/SRC/chbev.c
+++ b/lapack-netlib/SRC/chbev.c
@@ -0,0 +1,711 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static real c_b11 = 1.f;
 static integer c__1 = 1;

 /* > \brief <b> CHBEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER m
 atrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHBEV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbev.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbev.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbev.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, */
 /*                         RWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KD, LDAB, LDZ, N */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHBEV computes all the eigenvalues and, optionally, eigenvectors of */
 /* > a complex Hermitian band matrix A. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > */
 /* >          On exit, AB is overwritten by values generated during the */
 /* >          reduction to tridiagonal form.  If UPLO = 'U', the first */
 /* >          superdiagonal and the diagonal of the tridiagonal matrix T */
 /* >          are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */
 /* >          the diagonal and first subdiagonal of T are returned in the */
 /* >          first two rows of AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD + 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */
 /* >          eigenvectors of the matrix A, with the i-th column of Z */
 /* >          holding the eigenvector associated with W(i). */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (f2cmax(1,3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexOTHEReigen */

 /*  ===================================================================== */
 /* Subroutine */ int chbev_(char *jobz, char *uplo, integer *n, integer *kd, 
 	complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, 
 	complex *work, real *rwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, z_dim1, z_offset, i__1;
    real r__1;

    /* Local variables */
    integer inde;
    real anrm;
    integer imax;
    real rmin, rmax, sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    logical lower, wantz;
    extern real clanhb_(char *, char *, integer *, integer *, complex *, 
 	    integer *, real *);
    integer iscale;
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, complex *, integer *, integer *), chbtrd_(char *, char *, integer *, integer *, complex *, 
 	    integer *, real *, real *, complex *, integer *, complex *, 
 	    integer *);
    extern real slamch_(char *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum;
    integer indrwk;
    extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, 
 	    complex *, integer *, real *, integer *), ssterf_(integer 
 	    *, real *, real *, integer *);
    real smlnum, eps;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*kd < 0) {
 	*info = -4;
    } else if (*ldab < *kd + 1) {
 	*info = -6;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -9;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHBEV ", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	if (lower) {
 	    i__1 = ab_dim1 + 1;
 	    w[1] = ab[i__1].r;
 	} else {
 	    i__1 = *kd + 1 + ab_dim1;
 	    w[1] = ab[i__1].r;
 	}
 	if (wantz) {
 	    i__1 = z_dim1 + 1;
 	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = slamch_("Safe minimum");
    eps = slamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1.f / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]);
    iscale = 0;
    if (anrm > 0.f && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	if (lower) {
 	    clascl_("B", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	} else {
 	    clascl_("Q", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	}
    }

 /*     Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. */

    inde = 1;
    chbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &rwork[inde], &
 	    z__[z_offset], ldz, &work[1], &iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEQR. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	indrwk = inde + *n;
 	csteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[
 		indrwk], info);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	r__1 = 1.f / sigma;
 	sscal_(&imax, &r__1, &w[1], &c__1);
    }

    return 0;

 /*     End of CHBEV */

 } /* chbev_ */

--- a/lapack-netlib/SRC/chbev_2stage.c
+++ b/lapack-netlib/SRC/chbev_2stage.c
@@ -0,0 +1,819 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__2 = 2;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__4 = 4;
 static real c_b21 = 1.f;
 static integer c__1 = 1;

 /* > \brief <b> CHBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for 
 OTHER matrices</b> */

 /*  @generated from zhbev_2stage.f, fortran z -> c, Sat Nov  5 23:18:20 2016 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHBEV_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbev_2
 stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbev_2
 stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbev_2
 stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, */
 /*                                WORK, LWORK, RWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KD, LDAB, LDZ, N, LWORK */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of */
 /* > a complex Hermitian band matrix A using the 2stage technique for */
 /* > the reduction to tridiagonal. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > */
 /* >          On exit, AB is overwritten by values generated during the */
 /* >          reduction to tridiagonal form.  If UPLO = 'U', the first */
 /* >          superdiagonal and the diagonal of the tridiagonal matrix T */
 /* >          are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */
 /* >          the diagonal and first subdiagonal of T are returned in the */
 /* >          first two rows of AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD + 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */
 /* >          eigenvectors of the matrix A, with the i-th column of Z */
 /* >          holding the eigenvector associated with W(i). */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension LWORK */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. LWORK >= 1, when N <= 1; */
 /* >          otherwise */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = (2KD+1)*N + KD*NTHREADS */
 /* >                                   where KD is the size of the band. */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (f2cmax(1,3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complexOTHEReigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int chbev_2stage_(char *jobz, char *uplo, integer *n, 
 	integer *kd, complex *ab, integer *ldab, real *w, complex *z__, 
 	integer *ldz, complex *work, integer *lwork, real *rwork, integer *
 	info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, z_dim1, z_offset, i__1;
    real r__1;

    /* Local variables */
    extern /* Subroutine */ int chetrd_hb2st_(char *, char *, char *, 
 	    integer *, integer *, complex *, integer *, real *, real *, 
 	    complex *, integer *, complex *, integer *, integer *);
    integer inde;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    real anrm;
    integer imax;
    real rmin, rmax, sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    integer lhtrd, lwmin;
    logical lower;
    integer lwtrd;
    logical wantz;
    integer ib;
    extern real clanhb_(char *, char *, integer *, integer *, complex *, 
 	    integer *, real *);
    integer iscale;
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, complex *, integer *, integer *);
    extern real slamch_(char *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum;
    integer indwrk, indrwk;
    extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, 
 	    complex *, integer *, real *, integer *), ssterf_(integer 
 	    *, real *, real *, integer *);
    integer llwork;
    real smlnum;
    logical lquery;
    real eps;
    integer indhous;



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1;

    *info = 0;
    if (! lsame_(jobz, "N")) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*kd < 0) {
 	*info = -4;
    } else if (*ldab < *kd + 1) {
 	*info = -6;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -9;
    }

    if (*info == 0) {
 	if (*n <= 1) {
 	    lwmin = 1;
 	    work[1].r = (real) lwmin, work[1].i = 0.f;
 	} else {
 	    ib = ilaenv2stage_(&c__2, "CHETRD_HB2ST", jobz, n, kd, &c_n1, &
 		    c_n1);
 	    lhtrd = ilaenv2stage_(&c__3, "CHETRD_HB2ST", jobz, n, kd, &ib, &
 		    c_n1);
 	    lwtrd = ilaenv2stage_(&c__4, "CHETRD_HB2ST", jobz, n, kd, &ib, &
 		    c_n1);
 	    lwmin = lhtrd + lwtrd;
 	    work[1].r = (real) lwmin, work[1].i = 0.f;
 	}

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHBEV_2STAGE ", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	if (lower) {
 	    i__1 = ab_dim1 + 1;
 	    w[1] = ab[i__1].r;
 	} else {
 	    i__1 = *kd + 1 + ab_dim1;
 	    w[1] = ab[i__1].r;
 	}
 	if (wantz) {
 	    i__1 = z_dim1 + 1;
 	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = slamch_("Safe minimum");
    eps = slamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1.f / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]);
    iscale = 0;
    if (anrm > 0.f && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	if (lower) {
 	    clascl_("B", kd, kd, &c_b21, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	} else {
 	    clascl_("Q", kd, kd, &c_b21, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	}
    }

 /*     Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */

    inde = 1;
    indhous = 1;
    indwrk = indhous + lhtrd;
    llwork = *lwork - indwrk + 1;

    chetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &
 	    rwork[inde], &work[indhous], &lhtrd, &work[indwrk], &llwork, &
 	    iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEQR. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	indrwk = inde + *n;
 	csteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[
 		indrwk], info);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	r__1 = 1.f / sigma;
 	sscal_(&imax, &r__1, &w[1], &c__1);
    }

 /*     Set WORK(1) to optimal workspace size. */

    work[1].r = (real) lwmin, work[1].i = 0.f;

    return 0;

 /*     End of CHBEV_2STAGE */

 } /* chbev_2stage__ */

--- a/lapack-netlib/SRC/chbevd.c
+++ b/lapack-netlib/SRC/chbevd.c
@@ -0,0 +1,832 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {0.f,0.f};
 static complex c_b2 = {1.f,0.f};
 static real c_b13 = 1.f;
 static integer c__1 = 1;

 /* > \brief <b> CHBEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER 
 matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHBEVD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbevd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbevd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbevd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, */
 /*                          LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHBEVD computes all the eigenvalues and, optionally, eigenvectors of */
 /* > a complex Hermitian band matrix A.  If eigenvectors are desired, it */
 /* > uses a divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > */
 /* >          On exit, AB is overwritten by values generated during the */
 /* >          reduction to tridiagonal form.  If UPLO = 'U', the first */
 /* >          superdiagonal and the diagonal of the tridiagonal matrix T */
 /* >          are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */
 /* >          the diagonal and first subdiagonal of T are returned in the */
 /* >          first two rows of AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD + 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */
 /* >          eigenvectors of the matrix A, with the i-th column of Z */
 /* >          holding the eigenvector associated with W(i). */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If N <= 1,               LWORK must be at least 1. */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be at least N. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, */
 /* >                                         dimension (LRWORK) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of array RWORK. */
 /* >          If N <= 1,               LRWORK must be at least 1. */
 /* >          If JOBZ = 'N' and N > 1, LRWORK must be at least N. */
 /* >          If JOBZ = 'V' and N > 1, LRWORK must be at least */
 /* >                        1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of array IWORK. */
 /* >          If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */
 /* >          If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexOTHEReigen */

 /*  ===================================================================== */
 /* Subroutine */ int chbevd_(char *jobz, char *uplo, integer *n, integer *kd, 
 	complex *ab, integer *ldab, real *w, complex *z__, integer *ldz, 
 	complex *work, integer *lwork, real *rwork, integer *lrwork, integer *
 	iwork, integer *liwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, z_dim1, z_offset, i__1;
    real r__1;

    /* Local variables */
    integer inde;
    real anrm;
    integer imax;
    real rmin, rmax;
    integer llwk2;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
 	    integer *, complex *, complex *, integer *, complex *, integer *, 
 	    complex *, complex *, integer *);
    real sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    integer lwmin;
    logical lower;
    integer llrwk;
    logical wantz;
    integer indwk2;
    extern real clanhb_(char *, char *, integer *, integer *, complex *, 
 	    integer *, real *);
    integer iscale;
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, 
 	    integer *, complex *, integer *, real *, integer *, integer *, 
 	    integer *, integer *), chbtrd_(char *, char *, integer *, 
 	    integer *, complex *, integer *, real *, real *, complex *, 
 	    integer *, complex *, integer *);
    extern real slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
 	    *, integer *, complex *, integer *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum;
    integer indwrk, liwmin;
    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
    integer lrwmin;
    real smlnum;
    logical lquery;
    real eps;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1 || *liwork == -1 || *lrwork == -1;

    *info = 0;
    if (*n <= 1) {
 	lwmin = 1;
 	lrwmin = 1;
 	liwmin = 1;
    } else {
 	if (wantz) {
 /* Computing 2nd power */
 	    i__1 = *n;
 	    lwmin = i__1 * i__1 << 1;
 /* Computing 2nd power */
 	    i__1 = *n;
 	    lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 	    liwmin = *n * 5 + 3;
 	} else {
 	    lwmin = *n;
 	    lrwmin = *n;
 	    liwmin = 1;
 	}
    }
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*kd < 0) {
 	*info = -4;
    } else if (*ldab < *kd + 1) {
 	*info = -6;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -9;
    }

    if (*info == 0) {
 	work[1].r = (real) lwmin, work[1].i = 0.f;
 	rwork[1] = (real) lrwmin;
 	iwork[1] = liwmin;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -13;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -15;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHBEVD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = ab_dim1 + 1;
 	w[1] = ab[i__1].r;
 	if (wantz) {
 	    i__1 = z_dim1 + 1;
 	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = slamch_("Safe minimum");
    eps = slamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1.f / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]);
    iscale = 0;
    if (anrm > 0.f && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	if (lower) {
 	    clascl_("B", kd, kd, &c_b13, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	} else {
 	    clascl_("Q", kd, kd, &c_b13, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	}
    }

 /*     Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. */

    inde = 1;
    indwrk = inde + *n;
    indwk2 = *n * *n + 1;
    llwk2 = *lwork - indwk2 + 1;
    llrwk = *lrwork - indwrk + 1;
    chbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &rwork[inde], &
 	    z__[z_offset], ldz, &work[1], &iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEDC. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	cstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], &
 		llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info);
 	cgemm_("N", "N", n, n, n, &c_b2, &z__[z_offset], ldz, &work[1], n, &
 		c_b1, &work[indwk2], n);
 	clacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	r__1 = 1.f / sigma;
 	sscal_(&imax, &r__1, &w[1], &c__1);
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;
    rwork[1] = (real) lrwmin;
    iwork[1] = liwmin;
    return 0;

 /*     End of CHBEVD */

 } /* chbevd_ */

--- a/lapack-netlib/SRC/chbevd_2stage.c
+++ b/lapack-netlib/SRC/chbevd_2stage.c
@@ -0,0 +1,895 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {0.f,0.f};
 static complex c_b2 = {1.f,0.f};
 static integer c__2 = 2;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__4 = 4;
 static real c_b23 = 1.f;
 static integer c__1 = 1;

 /* > \brief <b> CHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for
 OTHER matrices</b> */

 /*  @generated from zhbevd_2stage.f, fortran z -> c, Sat Nov  5 23:18:17 2016 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHBEVD_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbevd_
 2stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbevd_
 2stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbevd_
 2stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, */
 /*                                 WORK, LWORK, RWORK, LRWORK, IWORK, */
 /*                                 LIWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of */
 /* > a complex Hermitian band matrix A using the 2stage technique for */
 /* > the reduction to tridiagonal.  If eigenvectors are desired, it */
 /* > uses a divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > */
 /* >          On exit, AB is overwritten by values generated during the */
 /* >          reduction to tridiagonal form.  If UPLO = 'U', the first */
 /* >          superdiagonal and the diagonal of the tridiagonal matrix T */
 /* >          are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */
 /* >          the diagonal and first subdiagonal of T are returned in the */
 /* >          first two rows of AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD + 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */
 /* >          eigenvectors of the matrix A, with the i-th column of Z */
 /* >          holding the eigenvector associated with W(i). */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. LWORK >= 1, when N <= 1; */
 /* >          otherwise */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = (2KD+1)*N + KD*NTHREADS */
 /* >                                   where KD is the size of the band. */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, */
 /* >                                         dimension (LRWORK) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of array RWORK. */
 /* >          If N <= 1,               LRWORK must be at least 1. */
 /* >          If JOBZ = 'N' and N > 1, LRWORK must be at least N. */
 /* >          If JOBZ = 'V' and N > 1, LRWORK must be at least */
 /* >                        1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of array IWORK. */
 /* >          If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */
 /* >          If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complexOTHEReigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int chbevd_2stage_(char *jobz, char *uplo, integer *n, 
 	integer *kd, complex *ab, integer *ldab, real *w, complex *z__, 
 	integer *ldz, complex *work, integer *lwork, real *rwork, integer *
 	lrwork, integer *iwork, integer *liwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, z_dim1, z_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    extern /* Subroutine */ int chetrd_hb2st_(char *, char *, char *, 
 	    integer *, integer *, complex *, integer *, real *, real *, 
 	    complex *, integer *, complex *, integer *, integer *);
    integer inde;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    real anrm;
    integer imax;
    real rmin, rmax;
    integer llwk2;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
 	    integer *, complex *, complex *, integer *, complex *, integer *, 
 	    complex *, complex *, integer *);
    real sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    integer indwk, lhtrd, lwmin;
    logical lower;
    integer lwtrd, llrwk;
    logical wantz;
    integer indwk2, ib;
    extern real clanhb_(char *, char *, integer *, integer *, complex *, 
 	    integer *, real *);
    integer iscale;
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, 
 	    integer *, complex *, integer *, real *, integer *, integer *, 
 	    integer *, integer *);
    extern real slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
 	    *, integer *, complex *, integer *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum;
    integer indrwk, liwmin;
    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
    integer lrwmin, llwork;
    real smlnum;
    logical lquery;
    real eps;
    integer indhous;



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1 || *liwork == -1 || *lrwork == -1;

    *info = 0;
    if (*n <= 1) {
 	lwmin = 1;
 	lrwmin = 1;
 	liwmin = 1;
    } else {
 	ib = ilaenv2stage_(&c__2, "CHETRD_HB2ST", jobz, n, kd, &c_n1, &c_n1);
 	lhtrd = ilaenv2stage_(&c__3, "CHETRD_HB2ST", jobz, n, kd, &ib, &c_n1);
 	lwtrd = ilaenv2stage_(&c__4, "CHETRD_HB2ST", jobz, n, kd, &ib, &c_n1);
 	if (wantz) {
 /* Computing 2nd power */
 	    i__1 = *n;
 	    lwmin = i__1 * i__1 << 1;
 /* Computing 2nd power */
 	    i__1 = *n;
 	    lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 	    liwmin = *n * 5 + 3;
 	} else {
 /* Computing MAX */
 	    i__1 = *n, i__2 = lhtrd + lwtrd;
 	    lwmin = f2cmax(i__1,i__2);
 	    lrwmin = *n;
 	    liwmin = 1;
 	}
    }
    if (! lsame_(jobz, "N")) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*kd < 0) {
 	*info = -4;
    } else if (*ldab < *kd + 1) {
 	*info = -6;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -9;
    }

    if (*info == 0) {
 	work[1].r = (real) lwmin, work[1].i = 0.f;
 	rwork[1] = (real) lrwmin;
 	iwork[1] = liwmin;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -13;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -15;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHBEVD_2STAGE", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = ab_dim1 + 1;
 	w[1] = ab[i__1].r;
 	if (wantz) {
 	    i__1 = z_dim1 + 1;
 	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = slamch_("Safe minimum");
    eps = slamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1.f / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = clanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]);
    iscale = 0;
    if (anrm > 0.f && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	if (lower) {
 	    clascl_("B", kd, kd, &c_b23, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	} else {
 	    clascl_("Q", kd, kd, &c_b23, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	}
    }

 /*     Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */

    inde = 1;
    indrwk = inde + *n;
    llrwk = *lrwork - indrwk + 1;
    indhous = 1;
    indwk = indhous + lhtrd;
    llwork = *lwork - indwk + 1;
    indwk2 = indwk + *n * *n;
    llwk2 = *lwork - indwk2 + 1;

    chetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &
 	    rwork[inde], &work[indhous], &lhtrd, &work[indwk], &llwork, &
 	    iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEDC. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	cstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], &
 		llwk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
 	cgemm_("N", "N", n, n, n, &c_b2, &z__[z_offset], ldz, &work[1], n, &
 		c_b1, &work[indwk2], n);
 	clacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	r__1 = 1.f / sigma;
 	sscal_(&imax, &r__1, &w[1], &c__1);
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;
    rwork[1] = (real) lrwmin;
    iwork[1] = liwmin;
    return 0;

 /*     End of CHBEVD_2STAGE */

 } /* chbevd_2stage__ */

--- a/lapack-netlib/SRC/chbevx.c
+++ b/lapack-netlib/SRC/chbevx.c
--- a/lapack-netlib/SRC/chbevx_2stage.c
+++ b/lapack-netlib/SRC/chbevx_2stage.c
--- a/lapack-netlib/SRC/chbgst.c
+++ b/lapack-netlib/SRC/chbgst.c
--- a/lapack-netlib/SRC/chbgv.c
+++ b/lapack-netlib/SRC/chbgv.c
@@ -0,0 +1,693 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b CHBGV */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHBGV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbgv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbgv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbgv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, */
 /*                         LDZ, WORK, RWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, N */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHBGV computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite banded eigenproblem, of */
 /* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */
 /* > and banded, and B is also positive definite. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KA */
 /* > \verbatim */
 /* >          KA is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KA >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KB */
 /* > \verbatim */
 /* >          KB is INTEGER */
 /* >          The number of superdiagonals of the matrix B if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KB >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first ka+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+ka). */
 /* > */
 /* >          On exit, the contents of AB are destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KA+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] BB */
 /* > \verbatim */
 /* >          BB is COMPLEX array, dimension (LDBB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix B, stored in the first kb+1 rows of the array.  The */
 /* >          j-th column of B is stored in the j-th column of the array BB */
 /* >          as follows: */
 /* >          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */
 /* >          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=f2cmin(n,j+kb). */
 /* > */
 /* >          On exit, the factor S from the split Cholesky factorization */
 /* >          B = S**H*S, as returned by CPBSTF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDBB */
 /* > \verbatim */
 /* >          LDBB is INTEGER */
 /* >          The leading dimension of the array BB.  LDBB >= KB+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */
 /* >          eigenvectors, with the i-th column of Z holding the */
 /* >          eigenvector associated with W(i). The eigenvectors are */
 /* >          normalized so that Z**H*B*Z = I. */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (3*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is: */
 /* >             <= N:  the algorithm failed to converge: */
 /* >                    i off-diagonal elements of an intermediate */
 /* >                    tridiagonal form did not converge to zero; */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then CPBSTF */
 /* >                    returned INFO = i: B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexOTHEReigen */

 /*  ===================================================================== */
 /* Subroutine */ int chbgv_(char *jobz, char *uplo, integer *n, integer *ka, 
 	integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, 
 	real *w, complex *z__, integer *ldz, complex *work, real *rwork, 
 	integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1;

    /* Local variables */
    integer inde;
    char vect[1];
    extern logical lsame_(char *, char *);
    integer iinfo;
    logical upper, wantz;
    extern /* Subroutine */ int chbtrd_(char *, char *, integer *, integer *, 
 	    complex *, integer *, real *, real *, complex *, integer *, 
 	    complex *, integer *), chbgst_(char *, char *, 
 	    integer *, integer *, integer *, complex *, integer *, complex *, 
 	    integer *, complex *, integer *, complex *, real *, integer *), xerbla_(char *, integer *, ftnlen), cpbstf_(char 
 	    *, integer *, integer *, complex *, integer *, integer *);
    integer indwrk;
    extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, 
 	    complex *, integer *, real *, integer *), ssterf_(integer 
 	    *, real *, real *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    bb_dim1 = *ldbb;
    bb_offset = 1 + bb_dim1 * 1;
    bb -= bb_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ka < 0) {
 	*info = -4;
    } else if (*kb < 0 || *kb > *ka) {
 	*info = -5;
    } else if (*ldab < *ka + 1) {
 	*info = -7;
    } else if (*ldbb < *kb + 1) {
 	*info = -9;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -12;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHBGV ", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a split Cholesky factorization of B. */

    cpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem. */

    inde = 1;
    indwrk = inde + *n;
    chbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb,
 	     &z__[z_offset], ldz, &work[1], &rwork[indwrk], &iinfo);

 /*     Reduce to tridiagonal form. */

    if (wantz) {
 	*(unsigned char *)vect = 'U';
    } else {
 	*(unsigned char *)vect = 'N';
    }
    chbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], &
 	    z__[z_offset], ldz, &work[1], &iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEQR. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	csteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[
 		indwrk], info);
    }
    return 0;

 /*     End of CHBGV */

 } /* chbgv_ */

--- a/lapack-netlib/SRC/chbgvd.c
+++ b/lapack-netlib/SRC/chbgvd.c
@@ -0,0 +1,825 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};
 static complex c_b2 = {0.f,0.f};

 /* > \brief \b CHBGVD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHBGVD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbgvd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbgvd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbgvd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, */
 /*                          Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, */
 /*                          LIWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, */
 /*      $                   LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHBGVD computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite banded eigenproblem, of */
 /* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */
 /* > and banded, and B is also positive definite.  If eigenvectors are */
 /* > desired, it uses a divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KA */
 /* > \verbatim */
 /* >          KA is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KA >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KB */
 /* > \verbatim */
 /* >          KB is INTEGER */
 /* >          The number of superdiagonals of the matrix B if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KB >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first ka+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+ka). */
 /* > */
 /* >          On exit, the contents of AB are destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KA+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] BB */
 /* > \verbatim */
 /* >          BB is COMPLEX array, dimension (LDBB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix B, stored in the first kb+1 rows of the array.  The */
 /* >          j-th column of B is stored in the j-th column of the array BB */
 /* >          as follows: */
 /* >          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */
 /* >          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=f2cmin(n,j+kb). */
 /* > */
 /* >          On exit, the factor S from the split Cholesky factorization */
 /* >          B = S**H*S, as returned by CPBSTF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDBB */
 /* > \verbatim */
 /* >          LDBB is INTEGER */
 /* >          The leading dimension of the array BB.  LDBB >= KB+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */
 /* >          eigenvectors, with the i-th column of Z holding the */
 /* >          eigenvector associated with W(i). The eigenvectors are */
 /* >          normalized so that Z**H*B*Z = I. */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO=0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If N <= 1,               LWORK >= 1. */
 /* >          If JOBZ = 'N' and N > 1, LWORK >= N. */
 /* >          If JOBZ = 'V' and N > 1, LWORK >= 2*N**2. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (MAX(1,LRWORK)) */
 /* >          On exit, if INFO=0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of array RWORK. */
 /* >          If N <= 1,               LRWORK >= 1. */
 /* >          If JOBZ = 'N' and N > 1, LRWORK >= N. */
 /* >          If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO=0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of array IWORK. */
 /* >          If JOBZ = 'N' or N <= 1, LIWORK >= 1. */
 /* >          If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is: */
 /* >             <= N:  the algorithm failed to converge: */
 /* >                    i off-diagonal elements of an intermediate */
 /* >                    tridiagonal form did not converge to zero; */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then CPBSTF */
 /* >                    returned INFO = i: B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup complexOTHEReigen */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

 /*  ===================================================================== */
 /* Subroutine */ int chbgvd_(char *jobz, char *uplo, integer *n, integer *ka, 
 	integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, 
 	real *w, complex *z__, integer *ldz, complex *work, integer *lwork, 
 	real *rwork, integer *lrwork, integer *iwork, integer *liwork, 
 	integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1;

    /* Local variables */
    integer inde;
    char vect[1];
    integer llwk2;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
 	    integer *, complex *, complex *, integer *, complex *, integer *, 
 	    complex *, complex *, integer *);
    extern logical lsame_(char *, char *);
    integer iinfo, lwmin;
    logical upper;
    integer llrwk;
    logical wantz;
    integer indwk2;
    extern /* Subroutine */ int cstedc_(char *, integer *, real *, real *, 
 	    complex *, integer *, complex *, integer *, real *, integer *, 
 	    integer *, integer *, integer *), chbtrd_(char *, char *, 
 	    integer *, integer *, complex *, integer *, real *, real *, 
 	    complex *, integer *, complex *, integer *), 
 	    chbgst_(char *, char *, integer *, integer *, integer *, complex *
 	    , integer *, complex *, integer *, complex *, integer *, complex *
 	    , real *, integer *), clacpy_(char *, integer *, 
 	    integer *, complex *, integer *, complex *, integer *), 
 	    xerbla_(char *, integer *, ftnlen), cpbstf_(char *, integer *, 
 	    integer *, complex *, integer *, integer *);
    integer indwrk, liwmin;
    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
    integer lrwmin;
    logical lquery;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    bb_dim1 = *ldbb;
    bb_offset = 1 + bb_dim1 * 1;
    bb -= bb_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (*n <= 1) {
 	lwmin = *n + 1;
 	lrwmin = *n + 1;
 	liwmin = 1;
    } else if (wantz) {
 /* Computing 2nd power */
 	i__1 = *n;
 	lwmin = i__1 * i__1 << 1;
 /* Computing 2nd power */
 	i__1 = *n;
 	lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 	liwmin = *n * 5 + 3;
    } else {
 	lwmin = *n;
 	lrwmin = *n;
 	liwmin = 1;
    }
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ka < 0) {
 	*info = -4;
    } else if (*kb < 0 || *kb > *ka) {
 	*info = -5;
    } else if (*ldab < *ka + 1) {
 	*info = -7;
    } else if (*ldbb < *kb + 1) {
 	*info = -9;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -12;
    }

    if (*info == 0) {
 	work[1].r = (real) lwmin, work[1].i = 0.f;
 	rwork[1] = (real) lrwmin;
 	iwork[1] = liwmin;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -14;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -16;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -18;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHBGVD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a split Cholesky factorization of B. */

    cpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem. */

    inde = 1;
    indwrk = inde + *n;
    indwk2 = *n * *n + 1;
    llwk2 = *lwork - indwk2 + 2;
    llrwk = *lrwork - indwrk + 2;
    chbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb,
 	     &z__[z_offset], ldz, &work[1], &rwork[1], &iinfo);

 /*     Reduce Hermitian band matrix to tridiagonal form. */

    if (wantz) {
 	*(unsigned char *)vect = 'U';
    } else {
 	*(unsigned char *)vect = 'N';
    }
    chbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], &
 	    z__[z_offset], ldz, &work[1], &iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEDC. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	cstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], &
 		llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info);
 	cgemm_("N", "N", n, n, n, &c_b1, &z__[z_offset], ldz, &work[1], n, &
 		c_b2, &work[indwk2], n);
 	clacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz);
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;
    rwork[1] = (real) lrwmin;
    iwork[1] = liwmin;
    return 0;

 /*     End of CHBGVD */

 } /* chbgvd_ */

--- a/lapack-netlib/SRC/chbgvx.c
+++ b/lapack-netlib/SRC/chbgvx.c
@@ -0,0 +1,976 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {0.f,0.f};
 static complex c_b2 = {1.f,0.f};
 static integer c__1 = 1;

 /* > \brief \b CHBGVX */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHBGVX + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbgvx.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbgvx.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbgvx.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, */
 /*                          LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, */
 /*                          LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) */

 /*       CHARACTER          JOBZ, RANGE, UPLO */
 /*       INTEGER            IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, */
 /*      $                   N */
 /*       REAL               ABSTOL, VL, VU */
 /*       INTEGER            IFAIL( * ), IWORK( * ) */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), */
 /*      $                   WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHBGVX computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite banded eigenproblem, of */
 /* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */
 /* > and banded, and B is also positive definite.  Eigenvalues and */
 /* > eigenvectors can be selected by specifying either all eigenvalues, */
 /* > a range of values or a range of indices for the desired eigenvalues. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] RANGE */
 /* > \verbatim */
 /* >          RANGE is CHARACTER*1 */
 /* >          = 'A': all eigenvalues will be found; */
 /* >          = 'V': all eigenvalues in the half-open interval (VL,VU] */
 /* >                 will be found; */
 /* >          = 'I': the IL-th through IU-th eigenvalues will be found. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KA */
 /* > \verbatim */
 /* >          KA is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KA >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KB */
 /* > \verbatim */
 /* >          KB is INTEGER */
 /* >          The number of superdiagonals of the matrix B if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KB >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first ka+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+ka). */
 /* > */
 /* >          On exit, the contents of AB are destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KA+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] BB */
 /* > \verbatim */
 /* >          BB is COMPLEX array, dimension (LDBB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix B, stored in the first kb+1 rows of the array.  The */
 /* >          j-th column of B is stored in the j-th column of the array BB */
 /* >          as follows: */
 /* >          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */
 /* >          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=f2cmin(n,j+kb). */
 /* > */
 /* >          On exit, the factor S from the split Cholesky factorization */
 /* >          B = S**H*S, as returned by CPBSTF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDBB */
 /* > \verbatim */
 /* >          LDBB is INTEGER */
 /* >          The leading dimension of the array BB.  LDBB >= KB+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Q */
 /* > \verbatim */
 /* >          Q is COMPLEX array, dimension (LDQ, N) */
 /* >          If JOBZ = 'V', the n-by-n matrix used in the reduction of */
 /* >          A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, */
 /* >          and consequently C to tridiagonal form. */
 /* >          If JOBZ = 'N', the array Q is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDQ */
 /* > \verbatim */
 /* >          LDQ is INTEGER */
 /* >          The leading dimension of the array Q.  If JOBZ = 'N', */
 /* >          LDQ >= 1. If JOBZ = 'V', LDQ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] VL */
 /* > \verbatim */
 /* >          VL is REAL */
 /* > */
 /* >          If RANGE='V', the lower bound of the interval to */
 /* >          be searched for eigenvalues. VL < VU. */
 /* >          Not referenced if RANGE = 'A' or 'I'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] VU */
 /* > \verbatim */
 /* >          VU is REAL */
 /* > */
 /* >          If RANGE='V', the upper bound of the interval to */
 /* >          be searched for eigenvalues. VL < VU. */
 /* >          Not referenced if RANGE = 'A' or 'I'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IL */
 /* > \verbatim */
 /* >          IL is INTEGER */
 /* > */
 /* >          If RANGE='I', the index of the */
 /* >          smallest eigenvalue to be returned. */
 /* >          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
 /* >          Not referenced if RANGE = 'A' or 'V'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IU */
 /* > \verbatim */
 /* >          IU is INTEGER */
 /* > */
 /* >          If RANGE='I', the index of the */
 /* >          largest eigenvalue to be returned. */
 /* >          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
 /* >          Not referenced if RANGE = 'A' or 'V'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ABSTOL */
 /* > \verbatim */
 /* >          ABSTOL is REAL */
 /* >          The absolute error tolerance for the eigenvalues. */
 /* >          An approximate eigenvalue is accepted as converged */
 /* >          when it is determined to lie in an interval [a,b] */
 /* >          of width less than or equal to */
 /* > */
 /* >                  ABSTOL + EPS *   f2cmax( |a|,|b| ) , */
 /* > */
 /* >          where EPS is the machine precision.  If ABSTOL is less than */
 /* >          or equal to zero, then  EPS*|T|  will be used in its place, */
 /* >          where |T| is the 1-norm of the tridiagonal matrix obtained */
 /* >          by reducing AP to tridiagonal form. */
 /* > */
 /* >          Eigenvalues will be computed most accurately when ABSTOL is */
 /* >          set to twice the underflow threshold 2*SLAMCH('S'), not zero. */
 /* >          If this routine returns with INFO>0, indicating that some */
 /* >          eigenvectors did not converge, try setting ABSTOL to */
 /* >          2*SLAMCH('S'). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The total number of eigenvalues found.  0 <= M <= N. */
 /* >          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */
 /* >          eigenvectors, with the i-th column of Z holding the */
 /* >          eigenvector associated with W(i). The eigenvectors are */
 /* >          normalized so that Z**H*B*Z = I. */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (7*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (5*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IFAIL */
 /* > \verbatim */
 /* >          IFAIL is INTEGER array, dimension (N) */
 /* >          If JOBZ = 'V', then if INFO = 0, the first M elements of */
 /* >          IFAIL are zero.  If INFO > 0, then IFAIL contains the */
 /* >          indices of the eigenvectors that failed to converge. */
 /* >          If JOBZ = 'N', then IFAIL is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is: */
 /* >             <= N:  then i eigenvectors failed to converge.  Their */
 /* >                    indices are stored in array IFAIL. */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then CPBSTF */
 /* >                    returned INFO = i: B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup complexOTHEReigen */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

 /*  ===================================================================== */
 /* Subroutine */ int chbgvx_(char *jobz, char *range, char *uplo, integer *n, 
 	integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, 
 	integer *ldbb, complex *q, integer *ldq, real *vl, real *vu, integer *
 	il, integer *iu, real *abstol, integer *m, real *w, complex *z__, 
 	integer *ldz, complex *work, real *rwork, integer *iwork, integer *
 	ifail, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, bb_dim1, bb_offset, q_dim1, q_offset, z_dim1, 
 	    z_offset, i__1, i__2;

    /* Local variables */
    integer indd, inde;
    char vect[1];
    logical test;
    integer itmp1, i__, j, indee;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
 	    , complex *, integer *, complex *, integer *, complex *, complex *
 	    , integer *);
    integer iinfo;
    char order[1];
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
 	    complex *, integer *), cswap_(integer *, complex *, integer *, 
 	    complex *, integer *);
    logical upper;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
 	    integer *);
    logical wantz;
    integer jj;
    logical alleig, indeig;
    integer indibl;
    extern /* Subroutine */ int chbtrd_(char *, char *, integer *, integer *, 
 	    complex *, integer *, real *, real *, complex *, integer *, 
 	    complex *, integer *);
    logical valeig;
    extern /* Subroutine */ int chbgst_(char *, char *, integer *, integer *, 
 	    integer *, complex *, integer *, complex *, integer *, complex *, 
 	    integer *, complex *, real *, integer *), clacpy_(
 	    char *, integer *, integer *, complex *, integer *, complex *, 
 	    integer *), xerbla_(char *, integer *, ftnlen), cpbstf_(
 	    char *, integer *, integer *, complex *, integer *, integer *);
    integer indiwk, indisp;
    extern /* Subroutine */ int cstein_(integer *, real *, real *, integer *, 
 	    real *, integer *, integer *, complex *, integer *, real *, 
 	    integer *, integer *, integer *);
    integer indrwk, indwrk;
    extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, 
 	    complex *, integer *, real *, integer *), ssterf_(integer 
 	    *, real *, real *, integer *);
    integer nsplit;
    extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, 
 	    real *, integer *, integer *, real *, real *, real *, integer *, 
 	    integer *, real *, integer *, integer *, real *, integer *, 
 	    integer *);
    real tmp1;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    bb_dim1 = *ldbb;
    bb_offset = 1 + bb_dim1 * 1;
    bb -= bb_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;
    --ifail;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    alleig = lsame_(range, "A");
    valeig = lsame_(range, "V");
    indeig = lsame_(range, "I");

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (alleig || valeig || indeig)) {
 	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*ka < 0) {
 	*info = -5;
    } else if (*kb < 0 || *kb > *ka) {
 	*info = -6;
    } else if (*ldab < *ka + 1) {
 	*info = -8;
    } else if (*ldbb < *kb + 1) {
 	*info = -10;
    } else if (*ldq < 1 || wantz && *ldq < *n) {
 	*info = -12;
    } else {
 	if (valeig) {
 	    if (*n > 0 && *vu <= *vl) {
 		*info = -14;
 	    }
 	} else if (indeig) {
 	    if (*il < 1 || *il > f2cmax(1,*n)) {
 		*info = -15;
 	    } else if (*iu < f2cmin(*n,*il) || *iu > *n) {
 		*info = -16;
 	    }
 	}
    }
    if (*info == 0) {
 	if (*ldz < 1 || wantz && *ldz < *n) {
 	    *info = -21;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHBGVX", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    *m = 0;
    if (*n == 0) {
 	return 0;
    }

 /*     Form a split Cholesky factorization of B. */

    cpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem. */

    chbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb,
 	     &q[q_offset], ldq, &work[1], &rwork[1], &iinfo);

 /*     Solve the standard eigenvalue problem. */
 /*     Reduce Hermitian band matrix to tridiagonal form. */

    indd = 1;
    inde = indd + *n;
    indrwk = inde + *n;
    indwrk = 1;
    if (wantz) {
 	*(unsigned char *)vect = 'U';
    } else {
 	*(unsigned char *)vect = 'N';
    }
    chbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &rwork[indd], &rwork[
 	    inde], &q[q_offset], ldq, &work[indwrk], &iinfo);

 /*     If all eigenvalues are desired and ABSTOL is less than or equal */
 /*     to zero, then call SSTERF or CSTEQR.  If this fails for some */
 /*     eigenvalue, then try SSTEBZ. */

    test = FALSE_;
    if (indeig) {
 	if (*il == 1 && *iu == *n) {
 	    test = TRUE_;
 	}
    }
    if ((alleig || test) && *abstol <= 0.f) {
 	scopy_(n, &rwork[indd], &c__1, &w[1], &c__1);
 	indee = indrwk + (*n << 1);
 	i__1 = *n - 1;
 	scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1);
 	if (! wantz) {
 	    ssterf_(n, &w[1], &rwork[indee], info);
 	} else {
 	    clacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz);
 	    csteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, &
 		    rwork[indrwk], info);
 	    if (*info == 0) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    ifail[i__] = 0;
 /* L10: */
 		}
 	    }
 	}
 	if (*info == 0) {
 	    *m = *n;
 	    goto L30;
 	}
 	*info = 0;
    }

 /*     Otherwise, call SSTEBZ and, if eigenvectors are desired, */
 /*     call CSTEIN. */

    if (wantz) {
 	*(unsigned char *)order = 'B';
    } else {
 	*(unsigned char *)order = 'E';
    }
    indibl = 1;
    indisp = indibl + *n;
    indiwk = indisp + *n;
    sstebz_(range, order, n, vl, vu, il, iu, abstol, &rwork[indd], &rwork[
 	    inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[
 	    indrwk], &iwork[indiwk], info);

    if (wantz) {
 	cstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &
 		iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[
 		indiwk], &ifail[1], info);

 /*        Apply unitary matrix used in reduction to tridiagonal */
 /*        form to eigenvectors returned by CSTEIN. */

 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
 	    ccopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1);
 	    cgemv_("N", n, n, &c_b2, &q[q_offset], ldq, &work[1], &c__1, &
 		    c_b1, &z__[j * z_dim1 + 1], &c__1);
 /* L20: */
 	}
    }

 L30:

 /*     If eigenvalues are not in order, then sort them, along with */
 /*     eigenvectors. */

    if (wantz) {
 	i__1 = *m - 1;
 	for (j = 1; j <= i__1; ++j) {
 	    i__ = 0;
 	    tmp1 = w[j];
 	    i__2 = *m;
 	    for (jj = j + 1; jj <= i__2; ++jj) {
 		if (w[jj] < tmp1) {
 		    i__ = jj;
 		    tmp1 = w[jj];
 		}
 /* L40: */
 	    }

 	    if (i__ != 0) {
 		itmp1 = iwork[indibl + i__ - 1];
 		w[i__] = w[j];
 		iwork[indibl + i__ - 1] = iwork[indibl + j - 1];
 		w[j] = tmp1;
 		iwork[indibl + j - 1] = itmp1;
 		cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1],
 			 &c__1);
 		if (*info != 0) {
 		    itmp1 = ifail[i__];
 		    ifail[i__] = ifail[j];
 		    ifail[j] = itmp1;
 		}
 	    }
 /* L50: */
 	}
    }

    return 0;

 /*     End of CHBGVX */

 } /* chbgvx_ */

--- a/lapack-netlib/SRC/chbtrd.c
+++ b/lapack-netlib/SRC/chbtrd.c
--- a/lapack-netlib/SRC/checon.c
+++ b/lapack-netlib/SRC/checon.c
@@ -0,0 +1,633 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b CHECON */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHECON + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/checon.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/checon.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/checon.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, */
 /*                          INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, N */
 /*       REAL               ANORM, RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHECON estimates the reciprocal of the condition number of a complex */
 /* > Hermitian matrix A using the factorization A = U*D*U**H or */
 /* > A = L*D*L**H computed by CHETRF. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the details of the factorization are stored */
 /* >          as an upper or lower triangular matrix. */
 /* >          = 'U':  Upper triangular, form is A = U*D*U**H; */
 /* >          = 'L':  Lower triangular, form is A = L*D*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          The block diagonal matrix D and the multipliers used to */
 /* >          obtain the factor U or L as computed by CHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D */
 /* >          as determined by CHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is REAL */
 /* >          The 1-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
 /* >          estimate of the 1-norm of inv(A) computed in this routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int checon_(char *uplo, integer *n, complex *a, integer *lda,
 	 integer *ipiv, real *anorm, real *rcond, complex *work, integer *
 	info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    integer kase, i__;
    extern logical lsame_(char *, char *);
    integer isave[3];
    logical upper;
    extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real 
 	    *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    real ainvnm;
    extern /* Subroutine */ int chetrs_(char *, integer *, integer *, complex 
 	    *, integer *, integer *, complex *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*anorm < 0.f) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHECON", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.f;
    if (*n == 0) {
 	*rcond = 1.f;
 	return 0;
    } else if (*anorm <= 0.f) {
 	return 0;
    }

 /*     Check that the diagonal matrix D is nonsingular. */

    if (upper) {

 /*        Upper triangular storage: examine D from bottom to top */

 	for (i__ = *n; i__ >= 1; --i__) {
 	    i__1 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__1].r == 0.f && a[i__1].i == 0.f)) {
 		return 0;
 	    }
 /* L10: */
 	}
    } else {

 /*        Lower triangular storage: examine D from top to bottom. */

 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__2].r == 0.f && a[i__2].i == 0.f)) {
 		return 0;
 	    }
 /* L20: */
 	}
    }

 /*     Estimate the 1-norm of the inverse. */

    kase = 0;
 L30:
    clacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave);
    if (kase != 0) {

 /*        Multiply by inv(L*D*L**H) or inv(U*D*U**H). */

 	chetrs_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], n, 
 		info);
 	goto L30;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.f) {
 	*rcond = 1.f / ainvnm / *anorm;
    }

    return 0;

 /*     End of CHECON */

 } /* checon_ */

--- a/lapack-netlib/SRC/checon_3.c
+++ b/lapack-netlib/SRC/checon_3.c
@@ -0,0 +1,674 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b CHECON_3 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHECON_3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/checon_
 3.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/checon_
 3.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/checon_
 3.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, */
 /*                            WORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, N */
 /*       REAL               ANORM, RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            A( LDA, * ), E ( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > CHECON_3 estimates the reciprocal of the condition number (in the */
 /* > 1-norm) of a complex Hermitian matrix A using the factorization */
 /* > computed by CHETRF_RK or CHETRF_BK: */
 /* > */
 /* >    A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), */
 /* > */
 /* > where U (or L) is unit upper (or lower) triangular matrix, */
 /* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */
 /* > matrix, P**T is the transpose of P, and D is Hermitian and block */
 /* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
 /* > This routine uses BLAS3 solver CHETRS_3. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the details of the factorization are */
 /* >          stored as an upper or lower triangular matrix: */
 /* >          = 'U':  Upper triangular, form is A = P*U*D*(U**H)*(P**T); */
 /* >          = 'L':  Lower triangular, form is A = P*L*D*(L**H)*(P**T). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          Diagonal of the block diagonal matrix D and factors U or L */
 /* >          as computed by CHETRF_RK and CHETRF_BK: */
 /* >            a) ONLY diagonal elements of the Hermitian block diagonal */
 /* >               matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */
 /* >               (superdiagonal (or subdiagonal) elements of D */
 /* >                should be provided on entry in array E), and */
 /* >            b) If UPLO = 'U': factor U in the superdiagonal part of A. */
 /* >               If UPLO = 'L': factor L in the subdiagonal part of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] E */
 /* > \verbatim */
 /* >          E is COMPLEX array, dimension (N) */
 /* >          On entry, contains the superdiagonal (or subdiagonal) */
 /* >          elements of the Hermitian block diagonal matrix D */
 /* >          with 1-by-1 or 2-by-2 diagonal blocks, where */
 /* >          If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; */
 /* >          If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. */
 /* > */
 /* >          NOTE: For 1-by-1 diagonal block D(k), where */
 /* >          1 <= k <= N, the element E(k) is not referenced in both */
 /* >          UPLO = 'U' or UPLO = 'L' cases. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D */
 /* >          as determined by CHETRF_RK or CHETRF_BK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is REAL */
 /* >          The 1-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
 /* >          estimate of the 1-norm of inv(A) computed in this routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup complexHEcomputational */

 /* > \par Contributors: */
 /*  ================== */
 /* > \verbatim */
 /* > */
 /* >  June 2017,  Igor Kozachenko, */
 /* >                  Computer Science Division, */
 /* >                  University of California, Berkeley */
 /* > */
 /* >  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */
 /* >                  School of Mathematics, */
 /* >                  University of Manchester */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int checon_3_(char *uplo, integer *n, complex *a, integer *
 	lda, complex *e, integer *ipiv, real *anorm, real *rcond, complex *
 	work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    integer kase, i__;
    extern logical lsame_(char *, char *);
    integer isave[3];
    logical upper;
    extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real 
 	    *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    real ainvnm;
    extern /* Subroutine */ int chetrs_3_(char *, integer *, integer *, 
 	    complex *, integer *, complex *, integer *, complex *, integer *, 
 	    integer *);


 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --e;
    --ipiv;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*anorm < 0.f) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHECON_3", &i__1, (ftnlen)8);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.f;
    if (*n == 0) {
 	*rcond = 1.f;
 	return 0;
    } else if (*anorm <= 0.f) {
 	return 0;
    }

 /*     Check that the diagonal matrix D is nonsingular. */

    if (upper) {

 /*        Upper triangular storage: examine D from bottom to top */

 	for (i__ = *n; i__ >= 1; --i__) {
 	    i__1 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__1].r == 0.f && a[i__1].i == 0.f)) {
 		return 0;
 	    }
 	}
    } else {

 /*        Lower triangular storage: examine D from top to bottom. */

 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__2].r == 0.f && a[i__2].i == 0.f)) {
 		return 0;
 	    }
 	}
    }

 /*     Estimate the 1-norm of the inverse. */

    kase = 0;
 L30:
    clacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave);
    if (kase != 0) {

 /*        Multiply by inv(L*D*L**H) or inv(U*D*U**H). */

 	chetrs_3_(uplo, n, &c__1, &a[a_offset], lda, &e[1], &ipiv[1], &work[
 		1], n, info);
 	goto L30;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.f) {
 	*rcond = 1.f / ainvnm / *anorm;
    }

    return 0;

 /*     End of CHECON_3 */

 } /* checon_3__ */

--- a/lapack-netlib/SRC/checon_rook.c
+++ b/lapack-netlib/SRC/checon_rook.c
@@ -0,0 +1,648 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief <b> CHECON_ROOK estimates the reciprocal of the condition number fort HE matrices using factorizat
 ion obtained with one of the bounded diagonal pivoting methods (f2cmax 2 interchanges) </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHECON_ROOK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/checon_
 rook.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/checon_
 rook.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/checon_
 rook.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHECON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, */
 /*                               INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, N */
 /*       REAL               ANORM, RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHECON_ROOK estimates the reciprocal of the condition number of a complex */
 /* > Hermitian matrix A using the factorization A = U*D*U**H or */
 /* > A = L*D*L**H computed by CHETRF_ROOK. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the details of the factorization are stored */
 /* >          as an upper or lower triangular matrix. */
 /* >          = 'U':  Upper triangular, form is A = U*D*U**H; */
 /* >          = 'L':  Lower triangular, form is A = L*D*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          The block diagonal matrix D and the multipliers used to */
 /* >          obtain the factor U or L as computed by CHETRF_ROOK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D */
 /* >          as determined by CHETRF_ROOK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is REAL */
 /* >          The 1-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
 /* >          estimate of the 1-norm of inv(A) computed in this routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEcomputational */

 /* > \par Contributors: */
 /*  ================== */
 /* > \verbatim */
 /* > */
 /* >  December 2016,  Igor Kozachenko, */
 /* >                  Computer Science Division, */
 /* >                  University of California, Berkeley */
 /* > */
 /* >  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */
 /* >                  School of Mathematics, */
 /* >                  University of Manchester */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int checon_rook_(char *uplo, integer *n, complex *a, 
 	integer *lda, integer *ipiv, real *anorm, real *rcond, complex *work, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    integer kase, i__;
    extern logical lsame_(char *, char *);
    integer isave[3];
    logical upper;
    extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real 
 	    *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    real ainvnm;
    extern /* Subroutine */ int chetrs_rook_(char *, integer *, integer *, 
 	    complex *, integer *, integer *, complex *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*anorm < 0.f) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHECON_ROOK", &i__1, (ftnlen)11);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.f;
    if (*n == 0) {
 	*rcond = 1.f;
 	return 0;
    } else if (*anorm <= 0.f) {
 	return 0;
    }

 /*     Check that the diagonal matrix D is nonsingular. */

    if (upper) {

 /*        Upper triangular storage: examine D from bottom to top */

 	for (i__ = *n; i__ >= 1; --i__) {
 	    i__1 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__1].r == 0.f && a[i__1].i == 0.f)) {
 		return 0;
 	    }
 /* L10: */
 	}
    } else {

 /*        Lower triangular storage: examine D from top to bottom. */

 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__2].r == 0.f && a[i__2].i == 0.f)) {
 		return 0;
 	    }
 /* L20: */
 	}
    }

 /*     Estimate the 1-norm of the inverse. */

    kase = 0;
 L30:
    clacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave);
    if (kase != 0) {

 /*        Multiply by inv(L*D*L**H) or inv(U*D*U**H). */

 	chetrs_rook_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], 
 		n, info);
 	goto L30;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.f) {
 	*rcond = 1.f / ainvnm / *anorm;
    }

    return 0;

 /*     End of CHECON_ROOK */

 } /* checon_rook__ */

--- a/lapack-netlib/SRC/cheequb.c
+++ b/lapack-netlib/SRC/cheequb.c
@@ -0,0 +1,873 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b CHEEQUB */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEEQUB + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheequb
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheequb
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheequb
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) */

 /*       INTEGER            INFO, LDA, N */
 /*       REAL               AMAX, SCOND */
 /*       CHARACTER          UPLO */
 /*       COMPLEX            A( LDA, * ), WORK( * ) */
 /*       REAL               S( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEEQUB computes row and column scalings intended to equilibrate a */
 /* > Hermitian matrix A (with respect to the Euclidean norm) and reduce */
 /* > its condition number. The scale factors S are computed by the BIN */
 /* > algorithm (see references) so that the scaled matrix B with elements */
 /* > B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of */
 /* > the smallest possible condition number over all possible diagonal */
 /* > scalings. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          The N-by-N Hermitian matrix whose scaling factors are to be */
 /* >          computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] S */
 /* > \verbatim */
 /* >          S is REAL array, dimension (N) */
 /* >          If INFO = 0, S contains the scale factors for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] SCOND */
 /* > \verbatim */
 /* >          SCOND is REAL */
 /* >          If INFO = 0, S contains the ratio of the smallest S(i) to */
 /* >          the largest S(i). If SCOND >= 0.1 and AMAX is neither too */
 /* >          large nor too small, it is not worth scaling by S. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AMAX */
 /* > \verbatim */
 /* >          AMAX is REAL */
 /* >          Largest absolute value of any matrix element. If AMAX is */
 /* >          very close to overflow or very close to underflow, the */
 /* >          matrix should be scaled. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, the i-th diagonal element is nonpositive. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date April 2012 */

 /* > \ingroup complexHEcomputational */

 /* > \par References: */
 /*  ================ */
 /* > */
 /* >  Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n */
 /* >  Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n */
 /* >  DOI 10.1023/B:NUMA.0000016606.32820.69 \n */
 /* >  Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int cheequb_(char *uplo, integer *n, complex *a, integer *
 	lda, real *s, real *scond, real *amax, complex *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4;
    doublereal d__1;
    complex q__1, q__2, q__3, q__4;

    /* Local variables */
    real base;
    integer iter;
    real smin, smax, d__;
    integer i__, j;
    real t, u, scale;
    extern logical lsame_(char *, char *);
    real c0, c1, c2, sumsq, si;
    logical up;
    extern real slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum;
    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real 
 	    *, real *);
    real smlnum, avg, std, tol;


 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     April 2012 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --s;
    --work;

    /* Function Body */
    *info = 0;
    if (! (lsame_(uplo, "U") || lsame_(uplo, "L"))) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEEQUB", &i__1, (ftnlen)7);
 	return 0;
    }
    up = lsame_(uplo, "U");
    *amax = 0.f;

 /*     Quick return if possible. */

    if (*n == 0) {
 	*scond = 1.f;
 	return 0;
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	s[i__] = 0.f;
    }
    *amax = 0.f;
    if (up) {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = j - 1;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		r__3 = s[i__], r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&a[i__ + j * a_dim1]), abs(r__2));
 		s[i__] = f2cmax(r__3,r__4);
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		r__3 = s[j], r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&a[i__ + j * a_dim1]), abs(r__2));
 		s[j] = f2cmax(r__3,r__4);
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		r__3 = *amax, r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&a[i__ + j * a_dim1]), abs(r__2));
 		*amax = f2cmax(r__3,r__4);
 	    }
 /* Computing MAX */
 	    i__2 = j + j * a_dim1;
 	    r__3 = s[j], r__4 = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = 
 		    r_imag(&a[j + j * a_dim1]), abs(r__2));
 	    s[j] = f2cmax(r__3,r__4);
 /* Computing MAX */
 	    i__2 = j + j * a_dim1;
 	    r__3 = *amax, r__4 = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = 
 		    r_imag(&a[j + j * a_dim1]), abs(r__2));
 	    *amax = f2cmax(r__3,r__4);
 	}
    } else {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MAX */
 	    i__2 = j + j * a_dim1;
 	    r__3 = s[j], r__4 = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = 
 		    r_imag(&a[j + j * a_dim1]), abs(r__2));
 	    s[j] = f2cmax(r__3,r__4);
 /* Computing MAX */
 	    i__2 = j + j * a_dim1;
 	    r__3 = *amax, r__4 = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = 
 		    r_imag(&a[j + j * a_dim1]), abs(r__2));
 	    *amax = f2cmax(r__3,r__4);
 	    i__2 = *n;
 	    for (i__ = j + 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		r__3 = s[i__], r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&a[i__ + j * a_dim1]), abs(r__2));
 		s[i__] = f2cmax(r__3,r__4);
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		r__3 = s[j], r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&a[i__ + j * a_dim1]), abs(r__2));
 		s[j] = f2cmax(r__3,r__4);
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		r__3 = *amax, r__4 = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&a[i__ + j * a_dim1]), abs(r__2));
 		*amax = f2cmax(r__3,r__4);
 	    }
 	}
    }
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	s[j] = 1.f / s[j];
    }
    tol = 1.f / sqrt(*n * 2.f);
    for (iter = 1; iter <= 100; ++iter) {
 	scale = 0.f;
 	sumsq = 0.f;
 /*        beta = |A|s */
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    work[i__2].r = 0.f, work[i__2].i = 0.f;
 	}
 	if (up) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__ + j * a_dim1;
 		    r__3 = ((r__1 = a[i__5].r, abs(r__1)) + (r__2 = r_imag(&a[
 			    i__ + j * a_dim1]), abs(r__2))) * s[j];
 		    q__1.r = work[i__4].r + r__3, q__1.i = work[i__4].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 		    i__3 = j;
 		    i__4 = j;
 		    i__5 = i__ + j * a_dim1;
 		    r__3 = ((r__1 = a[i__5].r, abs(r__1)) + (r__2 = r_imag(&a[
 			    i__ + j * a_dim1]), abs(r__2))) * s[i__];
 		    q__1.r = work[i__4].r + r__3, q__1.i = work[i__4].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 		}
 		i__2 = j;
 		i__3 = j;
 		i__4 = j + j * a_dim1;
 		r__3 = ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[j + 
 			j * a_dim1]), abs(r__2))) * s[j];
 		q__1.r = work[i__3].r + r__3, q__1.i = work[i__3].i;
 		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = j;
 		i__3 = j;
 		i__4 = j + j * a_dim1;
 		r__3 = ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[j + 
 			j * a_dim1]), abs(r__2))) * s[j];
 		q__1.r = work[i__3].r + r__3, q__1.i = work[i__3].i;
 		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
 		i__2 = *n;
 		for (i__ = j + 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__ + j * a_dim1;
 		    r__3 = ((r__1 = a[i__5].r, abs(r__1)) + (r__2 = r_imag(&a[
 			    i__ + j * a_dim1]), abs(r__2))) * s[j];
 		    q__1.r = work[i__4].r + r__3, q__1.i = work[i__4].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 		    i__3 = j;
 		    i__4 = j;
 		    i__5 = i__ + j * a_dim1;
 		    r__3 = ((r__1 = a[i__5].r, abs(r__1)) + (r__2 = r_imag(&a[
 			    i__ + j * a_dim1]), abs(r__2))) * s[i__];
 		    q__1.r = work[i__4].r + r__3, q__1.i = work[i__4].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 		}
 	    }
 	}
 /*        avg = s^T beta / n */
 	avg = 0.f;
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    i__3 = i__;
 	    q__2.r = s[i__2] * work[i__3].r, q__2.i = s[i__2] * work[i__3].i;
 	    q__1.r = avg + q__2.r, q__1.i = q__2.i;
 	    avg = q__1.r;
 	}
 	avg /= *n;
 	std = 0.f;
 	i__1 = *n << 1;
 	for (i__ = *n + 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    i__3 = i__ - *n;
 	    i__4 = i__ - *n;
 	    q__2.r = s[i__3] * work[i__4].r, q__2.i = s[i__3] * work[i__4].i;
 	    q__1.r = q__2.r - avg, q__1.i = q__2.i;
 	    work[i__2].r = q__1.r, work[i__2].i = q__1.i;
 	}
 	classq_(n, &work[*n + 1], &c__1, &scale, &sumsq);
 	std = scale * sqrt(sumsq / *n);
 	if (std < tol * avg) {
 	    goto L999;
 	}
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + i__ * a_dim1;
 	    t = (r__1 = a[i__2].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + i__ * 
 		    a_dim1]), abs(r__2));
 	    si = s[i__];
 	    c2 = (*n - 1) * t;
 	    i__2 = *n - 2;
 	    i__3 = i__;
 	    r__1 = t * si;
 	    q__2.r = work[i__3].r - r__1, q__2.i = work[i__3].i;
 	    d__1 = (doublereal) i__2;
 	    q__1.r = d__1 * q__2.r, q__1.i = d__1 * q__2.i;
 	    c1 = q__1.r;
 	    r__1 = -(t * si) * si;
 	    i__2 = i__;
 	    d__1 = 2.;
 	    q__4.r = d__1 * work[i__2].r, q__4.i = d__1 * work[i__2].i;
 	    q__3.r = si * q__4.r, q__3.i = si * q__4.i;
 	    q__2.r = r__1 + q__3.r, q__2.i = q__3.i;
 	    r__2 = *n * avg;
 	    q__1.r = q__2.r - r__2, q__1.i = q__2.i;
 	    c0 = q__1.r;
 	    d__ = c1 * c1 - c0 * 4 * c2;
 	    if (d__ <= 0.f) {
 		*info = -1;
 		return 0;
 	    }
 	    si = c0 * -2 / (c1 + sqrt(d__));
 	    d__ = si - s[i__];
 	    u = 0.f;
 	    if (up) {
 		i__2 = i__;
 		for (j = 1; j <= i__2; ++j) {
 		    i__3 = j + i__ * a_dim1;
 		    t = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + 
 			    i__ * a_dim1]), abs(r__2));
 		    u += s[j] * t;
 		    i__3 = j;
 		    i__4 = j;
 		    r__1 = d__ * t;
 		    q__1.r = work[i__4].r + r__1, q__1.i = work[i__4].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 		}
 		i__2 = *n;
 		for (j = i__ + 1; j <= i__2; ++j) {
 		    i__3 = i__ + j * a_dim1;
 		    t = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[i__ 
 			    + j * a_dim1]), abs(r__2));
 		    u += s[j] * t;
 		    i__3 = j;
 		    i__4 = j;
 		    r__1 = d__ * t;
 		    q__1.r = work[i__4].r + r__1, q__1.i = work[i__4].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 		}
 	    } else {
 		i__2 = i__;
 		for (j = 1; j <= i__2; ++j) {
 		    i__3 = i__ + j * a_dim1;
 		    t = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[i__ 
 			    + j * a_dim1]), abs(r__2));
 		    u += s[j] * t;
 		    i__3 = j;
 		    i__4 = j;
 		    r__1 = d__ * t;
 		    q__1.r = work[i__4].r + r__1, q__1.i = work[i__4].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 		}
 		i__2 = *n;
 		for (j = i__ + 1; j <= i__2; ++j) {
 		    i__3 = j + i__ * a_dim1;
 		    t = (r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + 
 			    i__ * a_dim1]), abs(r__2));
 		    u += s[j] * t;
 		    i__3 = j;
 		    i__4 = j;
 		    r__1 = d__ * t;
 		    q__1.r = work[i__4].r + r__1, q__1.i = work[i__4].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 		}
 	    }
 	    i__2 = i__;
 	    q__4.r = u + work[i__2].r, q__4.i = work[i__2].i;
 	    q__3.r = d__ * q__4.r, q__3.i = d__ * q__4.i;
 	    d__1 = (doublereal) (*n);
 	    q__2.r = q__3.r / d__1, q__2.i = q__3.i / d__1;
 	    q__1.r = avg + q__2.r, q__1.i = q__2.i;
 	    avg = q__1.r;
 	    s[i__] = si;
 	}
    }
 L999:
    smlnum = slamch_("SAFEMIN");
    bignum = 1.f / smlnum;
    smin = bignum;
    smax = 0.f;
    t = 1.f / sqrt(avg);
    base = slamch_("B");
    u = 1.f / log(base);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = (integer) (u * log(s[i__] * t));
 	s[i__] = pow_ri(&base, &i__2);
 /* Computing MIN */
 	r__1 = smin, r__2 = s[i__];
 	smin = f2cmin(r__1,r__2);
 /* Computing MAX */
 	r__1 = smax, r__2 = s[i__];
 	smax = f2cmax(r__1,r__2);
    }
    *scond = f2cmax(smin,smlnum) / f2cmin(smax,bignum);

    return 0;
 } /* cheequb_ */

--- a/lapack-netlib/SRC/cheev.c
+++ b/lapack-netlib/SRC/cheev.c
@@ -0,0 +1,721 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__0 = 0;
 static real c_b18 = 1.f;

 /* > \brief <b> CHEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matr
 ices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEEV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheev.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheev.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheev.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */
 /*                         INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, LDA, LWORK, N */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEEV computes all eigenvalues and, optionally, eigenvectors of a */
 /* > complex Hermitian matrix A. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          orthonormal eigenvectors of the matrix A. */
 /* >          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */
 /* >          or the upper triangle (if UPLO='U') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,2*N-1). */
 /* >          For optimal efficiency, LWORK >= (NB+1)*N, */
 /* >          where NB is the blocksize for CHETRD returned by ILAENV. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (f2cmax(1, 3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEeigen */

 /*  ===================================================================== */
 /* Subroutine */ int cheev_(char *jobz, char *uplo, integer *n, complex *a, 
 	integer *lda, real *w, complex *work, integer *lwork, real *rwork, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    integer inde;
    real anrm;
    integer imax;
    real rmin, rmax, sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    logical lower, wantz;
    integer nb;
    extern real clanhe_(char *, char *, integer *, complex *, integer *, real 
 	    *);
    integer iscale;
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, complex *, integer *, integer *);
    extern real slamch_(char *);
    extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer 
 	    *, real *, real *, complex *, complex *, integer *, integer *);
    real safmin;
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum;
    integer indtau, indwrk;
    extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, 
 	    complex *, integer *, real *, integer *), cungtr_(char *, 
 	    integer *, complex *, integer *, complex *, complex *, integer *, 
 	    integer *), ssterf_(integer *, real *, real *, integer *);
    integer llwork;
    real smlnum;
    integer lwkopt;
    logical lquery;
    real eps;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --w;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1;

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }

    if (*info == 0) {
 	nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 /* Computing MAX */
 	i__1 = 1, i__2 = (nb + 1) * *n;
 	lwkopt = f2cmax(i__1,i__2);
 	work[1].r = (real) lwkopt, work[1].i = 0.f;

 /* Computing MAX */
 	i__1 = 1, i__2 = (*n << 1) - 1;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -8;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEEV ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = a_dim1 + 1;
 	w[1] = a[i__1].r;
 	work[1].r = 1.f, work[1].i = 0.f;
 	if (wantz) {
 	    i__1 = a_dim1 + 1;
 	    a[i__1].r = 1.f, a[i__1].i = 0.f;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = slamch_("Safe minimum");
    eps = slamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1.f / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
    iscale = 0;
    if (anrm > 0.f && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	clascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, 
 		info);
    }

 /*     Call CHETRD to reduce Hermitian matrix to tridiagonal form. */

    inde = 1;
    indtau = 1;
    indwrk = indtau + *n;
    llwork = *lwork - indwrk + 1;
    chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
 	    work[indwrk], &llwork, &iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, first call */
 /*     CUNGTR to generate the unitary matrix, then call CSTEQR. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	cungtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], &
 		llwork, &iinfo);
 	indwrk = inde + *n;
 	csteqr_(jobz, n, &w[1], &rwork[inde], &a[a_offset], lda, &rwork[
 		indwrk], info);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	r__1 = 1.f / sigma;
 	sscal_(&imax, &r__1, &w[1], &c__1);
    }

 /*     Set WORK(1) to optimal complex workspace size. */

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

 /*     End of CHEEV */

 } /* cheev_ */

--- a/lapack-netlib/SRC/cheev_2stage.c
+++ b/lapack-netlib/SRC/cheev_2stage.c
@@ -0,0 +1,783 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static integer c__3 = 3;
 static integer c__4 = 4;
 static integer c__0 = 0;
 static real c_b28 = 1.f;

 /* > \brief <b> CHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for 
 HE matrices</b> */

 /*  @generated from zheev_2stage.f, fortran z -> c, Sat Nov  5 23:18:06 2016 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEEV_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheev_2
 stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheev_2
 stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheev_2
 stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, */
 /*                                RWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, LDA, LWORK, N */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a */
 /* > complex Hermitian matrix A using the 2stage technique for */
 /* > the reduction to tridiagonal. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          orthonormal eigenvectors of the matrix A. */
 /* >          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */
 /* >          or the upper triangle (if UPLO='U') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. LWORK >= 1, when N <= 1; */
 /* >          otherwise */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */
 /* >                                             = N*KD + N*f2cmax(KD+1,FACTOPTNB) */
 /* >                                               + f2cmax(2*KD*KD, KD*NTHREADS) */
 /* >                                               + (KD+1)*N + N */
 /* >                                   where KD is the blocking size of the reduction, */
 /* >                                   FACTOPTNB is the blocking used by the QR or LQ */
 /* >                                   algorithm, usually FACTOPTNB=128 is a good choice */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (f2cmax(1, 3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complexHEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int cheev_2stage_(char *jobz, char *uplo, integer *n, 
 	complex *a, integer *lda, real *w, complex *work, integer *lwork, 
 	real *rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1;
    real r__1;

    /* Local variables */
    integer inde;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    real anrm;
    integer imax;
    real rmin, rmax;
    extern /* Subroutine */ int chetrd_2stage_(char *, char *, integer *, 
 	    complex *, integer *, real *, real *, complex *, complex *, 
 	    integer *, complex *, integer *, integer *);
    real sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    integer lhtrd, lwmin;
    logical lower;
    integer lwtrd;
    logical wantz;
    integer ib, kd;
    extern real clanhe_(char *, char *, integer *, complex *, integer *, real 
 	    *);
    integer iscale;
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, complex *, integer *, integer *);
    extern real slamch_(char *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum;
    integer indtau, indwrk;
    extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, 
 	    complex *, integer *, real *, integer *), cungtr_(char *, 
 	    integer *, complex *, integer *, complex *, complex *, integer *, 
 	    integer *), ssterf_(integer *, real *, real *, integer *);
    integer llwork;
    real smlnum;
    logical lquery;
    real eps;
    integer indhous;



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --w;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1;

    *info = 0;
    if (! lsame_(jobz, "N")) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }

    if (*info == 0) {
 	kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", jobz, n, &c_n1, &c_n1, &
 		c_n1);
 	ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1);
 	lhtrd = ilaenv2stage_(&c__3, "CHETRD_2STAGE", jobz, n, &kd, &ib, &
 		c_n1);
 	lwtrd = ilaenv2stage_(&c__4, "CHETRD_2STAGE", jobz, n, &kd, &ib, &
 		c_n1);
 	lwmin = *n + lhtrd + lwtrd;
 	work[1].r = (real) lwmin, work[1].i = 0.f;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -8;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEEV_2STAGE ", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = a_dim1 + 1;
 	w[1] = a[i__1].r;
 	work[1].r = 1.f, work[1].i = 0.f;
 	if (wantz) {
 	    i__1 = a_dim1 + 1;
 	    a[i__1].r = 1.f, a[i__1].i = 0.f;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = slamch_("Safe minimum");
    eps = slamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1.f / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
    iscale = 0;
    if (anrm > 0.f && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	clascl_(uplo, &c__0, &c__0, &c_b28, &sigma, n, n, &a[a_offset], lda, 
 		info);
    }

 /*     Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */

    inde = 1;
    indtau = 1;
    indhous = indtau + *n;
    indwrk = indhous + lhtrd;
    llwork = *lwork - indwrk + 1;

    chetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &
 	    work[indtau], &work[indhous], &lhtrd, &work[indwrk], &llwork, &
 	    iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, first call */
 /*     CUNGTR to generate the unitary matrix, then call CSTEQR. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	cungtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], &
 		llwork, &iinfo);
 	indwrk = inde + *n;
 	csteqr_(jobz, n, &w[1], &rwork[inde], &a[a_offset], lda, &rwork[
 		indwrk], info);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	r__1 = 1.f / sigma;
 	sscal_(&imax, &r__1, &w[1], &c__1);
    }

 /*     Set WORK(1) to optimal complex workspace size. */

    work[1].r = (real) lwmin, work[1].i = 0.f;

    return 0;

 /*     End of CHEEV_2STAGE */

 } /* cheev_2stage__ */

--- a/lapack-netlib/SRC/cheevd.c
+++ b/lapack-netlib/SRC/cheevd.c
@@ -0,0 +1,826 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__0 = 0;
 static real c_b18 = 1.f;

 /* > \brief <b> CHEEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat
 rices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEEVD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheevd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheevd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheevd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */
 /*                          LRWORK, IWORK, LIWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEEVD computes all eigenvalues and, optionally, eigenvectors of a */
 /* > complex Hermitian matrix A.  If eigenvectors are desired, it uses a */
 /* > divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          orthonormal eigenvectors of the matrix A. */
 /* >          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */
 /* >          or the upper triangle (if UPLO='U') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. */
 /* >          If N <= 1,                LWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LWORK must be at least N + 1. */
 /* >          If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, */
 /* >                                         dimension (LRWORK) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of the array RWORK. */
 /* >          If N <= 1,                LRWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LRWORK must be at least N. */
 /* >          If JOBZ  = 'V' and N > 1, LRWORK must be at least */
 /* >                         1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of the array IWORK. */
 /* >          If N <= 1,                LIWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1. */
 /* >          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed */
 /* >                to converge; i off-diagonal elements of an intermediate */
 /* >                tridiagonal form did not converge to zero; */
 /* >                if INFO = i and JOBZ = 'V', then the algorithm failed */
 /* >                to compute an eigenvalue while working on the submatrix */
 /* >                lying in rows and columns INFO/(N+1) through */
 /* >                mod(INFO,N+1). */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* >  Modified description of INFO. Sven, 16 Feb 05. */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* > Jeff Rutter, Computer Science Division, University of California */
 /* > at Berkeley, USA */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int cheevd_(char *jobz, char *uplo, integer *n, complex *a, 
 	integer *lda, real *w, complex *work, integer *lwork, real *rwork, 
 	integer *lrwork, integer *iwork, integer *liwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    integer inde;
    real anrm;
    integer imax;
    real rmin, rmax;
    integer lopt;
    real sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    integer lwmin, liopt;
    logical lower;
    integer llrwk, lropt;
    logical wantz;
    integer indwk2, llwrk2;
    extern real clanhe_(char *, char *, integer *, complex *, integer *, real 
 	    *);
    integer iscale;
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, 
 	    integer *, complex *, integer *, real *, integer *, integer *, 
 	    integer *, integer *);
    extern real slamch_(char *);
    extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer 
 	    *, real *, real *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer 
 	    *, complex *, integer *);
    real safmin;
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum;
    integer indtau, indrwk, indwrk, liwmin;
    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
    integer lrwmin;
    extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *, 
 	    integer *, complex *, integer *, complex *, complex *, integer *, 
 	    complex *, integer *, integer *);
    integer llwork;
    real smlnum;
    logical lquery;
    real eps;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --w;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }

    if (*info == 0) {
 	if (*n <= 1) {
 	    lwmin = 1;
 	    lrwmin = 1;
 	    liwmin = 1;
 	    lopt = lwmin;
 	    lropt = lrwmin;
 	    liopt = liwmin;
 	} else {
 	    if (wantz) {
 		lwmin = (*n << 1) + *n * *n;
 /* Computing 2nd power */
 		i__1 = *n;
 		lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 		liwmin = *n * 5 + 3;
 	    } else {
 		lwmin = *n + 1;
 		lrwmin = *n;
 		liwmin = 1;
 	    }
 /* Computing MAX */
 	    i__1 = lwmin, i__2 = *n + ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1,
 		     &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 	    lopt = f2cmax(i__1,i__2);
 	    lropt = lrwmin;
 	    liopt = liwmin;
 	}
 	work[1].r = (real) lopt, work[1].i = 0.f;
 	rwork[1] = (real) lropt;
 	iwork[1] = liopt;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -8;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -10;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEEVD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = a_dim1 + 1;
 	w[1] = a[i__1].r;
 	if (wantz) {
 	    i__1 = a_dim1 + 1;
 	    a[i__1].r = 1.f, a[i__1].i = 0.f;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = slamch_("Safe minimum");
    eps = slamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1.f / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
    iscale = 0;
    if (anrm > 0.f && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	clascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, 
 		info);
    }

 /*     Call CHETRD to reduce Hermitian matrix to tridiagonal form. */

    inde = 1;
    indtau = 1;
    indwrk = indtau + *n;
    indrwk = inde + *n;
    indwk2 = indwrk + *n * *n;
    llwork = *lwork - indwrk + 1;
    llwrk2 = *lwork - indwk2 + 1;
    llrwk = *lrwork - indrwk + 1;
    chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
 	    work[indwrk], &llwork, &iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, first call */
 /*     CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */
 /*     tridiagonal matrix, then call CUNMTR to multiply it to the */
 /*     Householder transformations represented as Householder vectors in */
 /*     A. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], 
 		&llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
 	cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
 		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
 	clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	r__1 = 1.f / sigma;
 	sscal_(&imax, &r__1, &w[1], &c__1);
    }

    work[1].r = (real) lopt, work[1].i = 0.f;
    rwork[1] = (real) lropt;
    iwork[1] = liopt;

    return 0;

 /*     End of CHEEVD */

 } /* cheevd_ */

--- a/lapack-netlib/SRC/cheevd_2stage.c
+++ b/lapack-netlib/SRC/cheevd_2stage.c
@@ -0,0 +1,883 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static integer c__3 = 3;
 static integer c__4 = 4;
 static integer c__0 = 0;
 static real c_b28 = 1.f;

 /* > \brief <b> CHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for
 HE matrices</b> */

 /*  @generated from zheevd_2stage.f, fortran z -> c, Sat Nov  5 23:18:14 2016 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEEVD_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheevd_
 2stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheevd_
 2stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheevd_
 2stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, */
 /*                          RWORK, LRWORK, IWORK, LIWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a */
 /* > complex Hermitian matrix A using the 2stage technique for */
 /* > the reduction to tridiagonal.  If eigenvectors are desired, it uses a */
 /* > divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          orthonormal eigenvectors of the matrix A. */
 /* >          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */
 /* >          or the upper triangle (if UPLO='U') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If N <= 1,               LWORK must be at least 1. */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = f2cmax(stage1,stage2) + (KD+1)*N + N+1 */
 /* >                                             = N*KD + N*f2cmax(KD+1,FACTOPTNB) */
 /* >                                               + f2cmax(2*KD*KD, KD*NTHREADS) */
 /* >                                               + (KD+1)*N + N+1 */
 /* >                                   where KD is the blocking size of the reduction, */
 /* >                                   FACTOPTNB is the blocking used by the QR or LQ */
 /* >                                   algorithm, usually FACTOPTNB=128 is a good choice */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2 */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, */
 /* >                                         dimension (LRWORK) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of the array RWORK. */
 /* >          If N <= 1,                LRWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LRWORK must be at least N. */
 /* >          If JOBZ  = 'V' and N > 1, LRWORK must be at least */
 /* >                         1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of the array IWORK. */
 /* >          If N <= 1,                LIWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1. */
 /* >          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed */
 /* >                to converge; i off-diagonal elements of an intermediate */
 /* >                tridiagonal form did not converge to zero; */
 /* >                if INFO = i and JOBZ = 'V', then the algorithm failed */
 /* >                to compute an eigenvalue while working on the submatrix */
 /* >                lying in rows and columns INFO/(N+1) through */
 /* >                mod(INFO,N+1). */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complexHEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* >  Modified description of INFO. Sven, 16 Feb 05. */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* > Jeff Rutter, Computer Science Division, University of California */
 /* > at Berkeley, USA */
 /* > */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int cheevd_2stage_(char *jobz, char *uplo, integer *n, 
 	complex *a, integer *lda, real *w, complex *work, integer *lwork, 
 	real *rwork, integer *lrwork, integer *iwork, integer *liwork, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1;
    real r__1;

    /* Local variables */
    integer inde;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    real anrm;
    integer imax;
    real rmin, rmax;
    extern /* Subroutine */ int chetrd_2stage_(char *, char *, integer *, 
 	    complex *, integer *, real *, real *, complex *, complex *, 
 	    integer *, complex *, integer *, integer *);
    real sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    integer lhtrd, lwmin;
    logical lower;
    integer llrwk, lwtrd;
    logical wantz;
    integer indwk2, ib, llwrk2, kd;
    extern real clanhe_(char *, char *, integer *, complex *, integer *, real 
 	    *);
    integer iscale;
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, 
 	    integer *, complex *, integer *, real *, integer *, integer *, 
 	    integer *, integer *);
    extern real slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
 	    *, integer *, complex *, integer *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum;
    integer indtau, indrwk, indwrk, liwmin;
    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
    integer lrwmin;
    extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *, 
 	    integer *, complex *, integer *, complex *, complex *, integer *, 
 	    complex *, integer *, integer *);
    integer llwork;
    real smlnum;
    logical lquery;
    real eps;
    integer indhous;



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --w;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (! lsame_(jobz, "N")) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }

    if (*info == 0) {
 	if (*n <= 1) {
 	    lwmin = 1;
 	    lrwmin = 1;
 	    liwmin = 1;
 	} else {
 	    kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", jobz, n, &c_n1, &c_n1, 
 		    &c_n1);
 	    ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", jobz, n, &kd, &c_n1, &
 		    c_n1);
 	    lhtrd = ilaenv2stage_(&c__3, "CHETRD_2STAGE", jobz, n, &kd, &ib, &
 		    c_n1);
 	    lwtrd = ilaenv2stage_(&c__4, "CHETRD_2STAGE", jobz, n, &kd, &ib, &
 		    c_n1);
 	    if (wantz) {
 		lwmin = (*n << 1) + *n * *n;
 /* Computing 2nd power */
 		i__1 = *n;
 		lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 		liwmin = *n * 5 + 3;
 	    } else {
 		lwmin = *n + 1 + lhtrd + lwtrd;
 		lrwmin = *n;
 		liwmin = 1;
 	    }
 	}
 	work[1].r = (real) lwmin, work[1].i = 0.f;
 	rwork[1] = (real) lrwmin;
 	iwork[1] = liwmin;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -8;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -10;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEEVD_2STAGE", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = a_dim1 + 1;
 	w[1] = a[i__1].r;
 	if (wantz) {
 	    i__1 = a_dim1 + 1;
 	    a[i__1].r = 1.f, a[i__1].i = 0.f;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = slamch_("Safe minimum");
    eps = slamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1.f / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
    iscale = 0;
    if (anrm > 0.f && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	clascl_(uplo, &c__0, &c__0, &c_b28, &sigma, n, n, &a[a_offset], lda, 
 		info);
    }

 /*     Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */

    inde = 1;
    indrwk = inde + *n;
    llrwk = *lrwork - indrwk + 1;
    indtau = 1;
    indhous = indtau + *n;
    indwrk = indhous + lhtrd;
    llwork = *lwork - indwrk + 1;
    indwk2 = indwrk + *n * *n;
    llwrk2 = *lwork - indwk2 + 1;

    chetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &
 	    work[indtau], &work[indhous], &lhtrd, &work[indwrk], &llwork, &
 	    iinfo);

 /*     For eigenvalues only, call SSTERF.  For eigenvectors, first call */
 /*     CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */
 /*     tridiagonal matrix, then call CUNMTR to multiply it to the */
 /*     Householder transformations represented as Householder vectors in */
 /*     A. */

    if (! wantz) {
 	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
 	cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], 
 		&llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
 	cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
 		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
 	clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	r__1 = 1.f / sigma;
 	sscal_(&imax, &r__1, &w[1], &c__1);
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;
    rwork[1] = (real) lrwmin;
    iwork[1] = liwmin;

    return 0;

 /*     End of CHEEVD_2STAGE */

 } /* cheevd_2stage__ */

--- a/lapack-netlib/SRC/cheevr.c
+++ b/lapack-netlib/SRC/cheevr.c
--- a/lapack-netlib/SRC/cheevr_2stage.c
+++ b/lapack-netlib/SRC/cheevr_2stage.c
--- a/lapack-netlib/SRC/cheevx.c
+++ b/lapack-netlib/SRC/cheevx.c
--- a/lapack-netlib/SRC/cheevx_2stage.c
+++ b/lapack-netlib/SRC/cheevx_2stage.c
--- a/lapack-netlib/SRC/chegs2.c
+++ b/lapack-netlib/SRC/chegs2.c
@@ -0,0 +1,767 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};
 static integer c__1 = 1;

 /* > \brief \b CHEGS2 reduces a Hermitian definite generalized eigenproblem to standard form, using the factor
 ization results obtained from cpotrf (unblocked algorithm). */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEGS2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chegs2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chegs2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chegs2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, N */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEGS2 reduces a complex Hermitian-definite generalized */
 /* > eigenproblem to standard form. */
 /* > */
 /* > If ITYPE = 1, the problem is A*x = lambda*B*x, */
 /* > and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */
 /* > */
 /* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */
 /* > B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L. */
 /* > */
 /* > B must have been previously factorized as U**H *U or L*L**H by ZPOTRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */
 /* >          = 2 or 3: compute U*A*U**H or L**H *A*L. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the upper or lower triangular part of the */
 /* >          Hermitian matrix A is stored, and how B has been factorized. */
 /* >          = 'U':  Upper triangular */
 /* >          = 'L':  Lower triangular */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          n by n upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading n by n lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the transformed matrix, stored in the */
 /* >          same format as A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,N) */
 /* >          The triangular factor from the Cholesky factorization of B, */
 /* >          as returned by CPOTRF. */
 /* >          B is modified by the routine but restored on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int chegs2_(integer *itype, char *uplo, integer *n, complex *
 	a, integer *lda, complex *b, integer *ldb, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    real r__1, r__2;
    complex q__1;

    /* Local variables */
    extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex *
 	    , integer *, complex *, integer *, complex *, integer *);
    integer k;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, 
 	    integer *, complex *, integer *);
    logical upper;
    extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, 
 	    complex *, integer *, complex *, integer *), ctrsv_(char *, char *, char *, integer *, complex *, 
 	    integer *, complex *, integer *);
    complex ct;
    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), 
 	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *, 
 	    integer *, ftnlen);
    real akk, bkk;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEGS2", &i__1, (ftnlen)6);
 	return 0;
    }

    if (*itype == 1) {
 	if (upper) {

 /*           Compute inv(U**H)*A*inv(U) */

 	    i__1 = *n;
 	    for (k = 1; k <= i__1; ++k) {

 /*              Update the upper triangle of A(k:n,k:n) */

 		i__2 = k + k * a_dim1;
 		akk = a[i__2].r;
 		i__2 = k + k * b_dim1;
 		bkk = b[i__2].r;
 /* Computing 2nd power */
 		r__1 = bkk;
 		akk /= r__1 * r__1;
 		i__2 = k + k * a_dim1;
 		a[i__2].r = akk, a[i__2].i = 0.f;
 		if (k < *n) {
 		    i__2 = *n - k;
 		    r__1 = 1.f / bkk;
 		    csscal_(&i__2, &r__1, &a[k + (k + 1) * a_dim1], lda);
 		    r__1 = akk * -.5f;
 		    ct.r = r__1, ct.i = 0.f;
 		    i__2 = *n - k;
 		    clacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda);
 		    i__2 = *n - k;
 		    clacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb);
 		    i__2 = *n - k;
 		    caxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (
 			    k + 1) * a_dim1], lda);
 		    i__2 = *n - k;
 		    q__1.r = -1.f, q__1.i = 0.f;
 		    cher2_(uplo, &i__2, &q__1, &a[k + (k + 1) * a_dim1], lda, 
 			    &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) 
 			    * a_dim1], lda);
 		    i__2 = *n - k;
 		    caxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (
 			    k + 1) * a_dim1], lda);
 		    i__2 = *n - k;
 		    clacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb);
 		    i__2 = *n - k;
 		    ctrsv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[
 			    k + 1 + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * 
 			    a_dim1], lda);
 		    i__2 = *n - k;
 		    clacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda);
 		}
 /* L10: */
 	    }
 	} else {

 /*           Compute inv(L)*A*inv(L**H) */

 	    i__1 = *n;
 	    for (k = 1; k <= i__1; ++k) {

 /*              Update the lower triangle of A(k:n,k:n) */

 		i__2 = k + k * a_dim1;
 		akk = a[i__2].r;
 		i__2 = k + k * b_dim1;
 		bkk = b[i__2].r;
 /* Computing 2nd power */
 		r__1 = bkk;
 		akk /= r__1 * r__1;
 		i__2 = k + k * a_dim1;
 		a[i__2].r = akk, a[i__2].i = 0.f;
 		if (k < *n) {
 		    i__2 = *n - k;
 		    r__1 = 1.f / bkk;
 		    csscal_(&i__2, &r__1, &a[k + 1 + k * a_dim1], &c__1);
 		    r__1 = akk * -.5f;
 		    ct.r = r__1, ct.i = 0.f;
 		    i__2 = *n - k;
 		    caxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 
 			    1 + k * a_dim1], &c__1);
 		    i__2 = *n - k;
 		    q__1.r = -1.f, q__1.i = 0.f;
 		    cher2_(uplo, &i__2, &q__1, &a[k + 1 + k * a_dim1], &c__1, 
 			    &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) 
 			    * a_dim1], lda);
 		    i__2 = *n - k;
 		    caxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 
 			    1 + k * a_dim1], &c__1);
 		    i__2 = *n - k;
 		    ctrsv_(uplo, "No transpose", "Non-unit", &i__2, &b[k + 1 
 			    + (k + 1) * b_dim1], ldb, &a[k + 1 + k * a_dim1], 
 			    &c__1);
 		}
 /* L20: */
 	    }
 	}
    } else {
 	if (upper) {

 /*           Compute U*A*U**H */

 	    i__1 = *n;
 	    for (k = 1; k <= i__1; ++k) {

 /*              Update the upper triangle of A(1:k,1:k) */

 		i__2 = k + k * a_dim1;
 		akk = a[i__2].r;
 		i__2 = k + k * b_dim1;
 		bkk = b[i__2].r;
 		i__2 = k - 1;
 		ctrmv_(uplo, "No transpose", "Non-unit", &i__2, &b[b_offset], 
 			ldb, &a[k * a_dim1 + 1], &c__1);
 		r__1 = akk * .5f;
 		ct.r = r__1, ct.i = 0.f;
 		i__2 = k - 1;
 		caxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 
 			1], &c__1);
 		i__2 = k - 1;
 		cher2_(uplo, &i__2, &c_b1, &a[k * a_dim1 + 1], &c__1, &b[k * 
 			b_dim1 + 1], &c__1, &a[a_offset], lda);
 		i__2 = k - 1;
 		caxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 
 			1], &c__1);
 		i__2 = k - 1;
 		csscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1);
 		i__2 = k + k * a_dim1;
 /* Computing 2nd power */
 		r__2 = bkk;
 		r__1 = akk * (r__2 * r__2);
 		a[i__2].r = r__1, a[i__2].i = 0.f;
 /* L30: */
 	    }
 	} else {

 /*           Compute L**H *A*L */

 	    i__1 = *n;
 	    for (k = 1; k <= i__1; ++k) {

 /*              Update the lower triangle of A(1:k,1:k) */

 		i__2 = k + k * a_dim1;
 		akk = a[i__2].r;
 		i__2 = k + k * b_dim1;
 		bkk = b[i__2].r;
 		i__2 = k - 1;
 		clacgv_(&i__2, &a[k + a_dim1], lda);
 		i__2 = k - 1;
 		ctrmv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[
 			b_offset], ldb, &a[k + a_dim1], lda);
 		r__1 = akk * .5f;
 		ct.r = r__1, ct.i = 0.f;
 		i__2 = k - 1;
 		clacgv_(&i__2, &b[k + b_dim1], ldb);
 		i__2 = k - 1;
 		caxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda);
 		i__2 = k - 1;
 		cher2_(uplo, &i__2, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1]
 			, ldb, &a[a_offset], lda);
 		i__2 = k - 1;
 		caxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda);
 		i__2 = k - 1;
 		clacgv_(&i__2, &b[k + b_dim1], ldb);
 		i__2 = k - 1;
 		csscal_(&i__2, &bkk, &a[k + a_dim1], lda);
 		i__2 = k - 1;
 		clacgv_(&i__2, &a[k + a_dim1], lda);
 		i__2 = k + k * a_dim1;
 /* Computing 2nd power */
 		r__2 = bkk;
 		r__1 = akk * (r__2 * r__2);
 		a[i__2].r = r__1, a[i__2].i = 0.f;
 /* L40: */
 	    }
 	}
    }
    return 0;

 /*     End of CHEGS2 */

 } /* chegs2_ */

--- a/lapack-netlib/SRC/chegst.c
+++ b/lapack-netlib/SRC/chegst.c
@@ -0,0 +1,784 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};
 static complex c_b2 = {.5f,0.f};
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static real c_b18 = 1.f;

 /* > \brief \b CHEGST */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEGST + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chegst.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chegst.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chegst.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, N */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEGST reduces a complex Hermitian-definite generalized */
 /* > eigenproblem to standard form. */
 /* > */
 /* > If ITYPE = 1, the problem is A*x = lambda*B*x, */
 /* > and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */
 /* > */
 /* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */
 /* > B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. */
 /* > */
 /* > B must have been previously factorized as U**H*U or L*L**H by CPOTRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */
 /* >          = 2 or 3: compute U*A*U**H or L**H*A*L. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored and B is factored as */
 /* >                  U**H*U; */
 /* >          = 'L':  Lower triangle of A is stored and B is factored as */
 /* >                  L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the transformed matrix, stored in the */
 /* >          same format as A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,N) */
 /* >          The triangular factor from the Cholesky factorization of B, */
 /* >          as returned by CPOTRF. */
 /* >          B is modified by the routine but restored on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int chegst_(integer *itype, char *uplo, integer *n, complex *
 	a, integer *lda, complex *b, integer *ldb, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
    complex q__1;

    /* Local variables */
    integer k;
    extern /* Subroutine */ int chemm_(char *, char *, integer *, integer *, 
 	    complex *, complex *, integer *, complex *, integer *, complex *, 
 	    complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, complex *, complex *, integer *, complex *, 
 	    integer *), ctrsm_(char *, char *,
 	     char *, char *, integer *, integer *, complex *, complex *, 
 	    integer *, complex *, integer *);
    logical upper;
    extern /* Subroutine */ int chegs2_(integer *, char *, integer *, complex 
 	    *, integer *, complex *, integer *, integer *), cher2k_(
 	    char *, char *, integer *, integer *, complex *, complex *, 
 	    integer *, complex *, integer *, real *, complex *, integer *);
    integer kb, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEGST", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "CHEGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);

    if (nb <= 1 || nb >= *n) {

 /*        Use unblocked code */

 	chegs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    } else {

 /*        Use blocked code */

 	if (*itype == 1) {
 	    if (upper) {

 /*              Compute inv(U**H)*A*inv(U) */

 		i__1 = *n;
 		i__2 = nb;
 		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
 /* Computing MIN */
 		    i__3 = *n - k + 1;
 		    kb = f2cmin(i__3,nb);

 /*                 Update the upper triangle of A(k:n,k:n) */

 		    chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
 			    k * b_dim1], ldb, info);
 		    if (k + kb <= *n) {
 			i__3 = *n - k - kb + 1;
 			ctrsm_("Left", uplo, "Conjugate transpose", "Non-unit"
 				, &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, 
 				&a[k + (k + kb) * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			q__1.r = -.5f, q__1.i = 0.f;
 			chemm_("Left", uplo, &kb, &i__3, &q__1, &a[k + k * 
 				a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, 
 				&c_b1, &a[k + (k + kb) * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			q__1.r = -1.f, q__1.i = 0.f;
 			cher2k_(uplo, "Conjugate transpose", &i__3, &kb, &
 				q__1, &a[k + (k + kb) * a_dim1], lda, &b[k + (
 				k + kb) * b_dim1], ldb, &c_b18, &a[k + kb + (
 				k + kb) * a_dim1], lda)
 				;
 			i__3 = *n - k - kb + 1;
 			q__1.r = -.5f, q__1.i = 0.f;
 			chemm_("Left", uplo, &kb, &i__3, &q__1, &a[k + k * 
 				a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, 
 				&c_b1, &a[k + (k + kb) * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			ctrsm_("Right", uplo, "No transpose", "Non-unit", &kb,
 				 &i__3, &c_b1, &b[k + kb + (k + kb) * b_dim1],
 				 ldb, &a[k + (k + kb) * a_dim1], lda);
 		    }
 /* L10: */
 		}
 	    } else {

 /*              Compute inv(L)*A*inv(L**H) */

 		i__2 = *n;
 		i__1 = nb;
 		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
 /* Computing MIN */
 		    i__3 = *n - k + 1;
 		    kb = f2cmin(i__3,nb);

 /*                 Update the lower triangle of A(k:n,k:n) */

 		    chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
 			    k * b_dim1], ldb, info);
 		    if (k + kb <= *n) {
 			i__3 = *n - k - kb + 1;
 			ctrsm_("Right", uplo, "Conjugate transpose", "Non-un"
 				"it", &i__3, &kb, &c_b1, &b[k + k * b_dim1], 
 				ldb, &a[k + kb + k * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			q__1.r = -.5f, q__1.i = 0.f;
 			chemm_("Right", uplo, &i__3, &kb, &q__1, &a[k + k * 
 				a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &
 				c_b1, &a[k + kb + k * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			q__1.r = -1.f, q__1.i = 0.f;
 			cher2k_(uplo, "No transpose", &i__3, &kb, &q__1, &a[k 
 				+ kb + k * a_dim1], lda, &b[k + kb + k * 
 				b_dim1], ldb, &c_b18, &a[k + kb + (k + kb) * 
 				a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			q__1.r = -.5f, q__1.i = 0.f;
 			chemm_("Right", uplo, &i__3, &kb, &q__1, &a[k + k * 
 				a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &
 				c_b1, &a[k + kb + k * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			ctrsm_("Left", uplo, "No transpose", "Non-unit", &
 				i__3, &kb, &c_b1, &b[k + kb + (k + kb) * 
 				b_dim1], ldb, &a[k + kb + k * a_dim1], lda);
 		    }
 /* L20: */
 		}
 	    }
 	} else {
 	    if (upper) {

 /*              Compute U*A*U**H */

 		i__1 = *n;
 		i__2 = nb;
 		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
 /* Computing MIN */
 		    i__3 = *n - k + 1;
 		    kb = f2cmin(i__3,nb);

 /*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */

 		    i__3 = k - 1;
 		    ctrmm_("Left", uplo, "No transpose", "Non-unit", &i__3, &
 			    kb, &c_b1, &b[b_offset], ldb, &a[k * a_dim1 + 1], 
 			    lda);
 		    i__3 = k - 1;
 		    chemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * 
 			    a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[
 			    k * a_dim1 + 1], lda);
 		    i__3 = k - 1;
 		    cher2k_(uplo, "No transpose", &i__3, &kb, &c_b1, &a[k * 
 			    a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b18,
 			     &a[a_offset], lda);
 		    i__3 = k - 1;
 		    chemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * 
 			    a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[
 			    k * a_dim1 + 1], lda);
 		    i__3 = k - 1;
 		    ctrmm_("Right", uplo, "Conjugate transpose", "Non-unit", &
 			    i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, &a[k * 
 			    a_dim1 + 1], lda);
 		    chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
 			    k * b_dim1], ldb, info);
 /* L30: */
 		}
 	    } else {

 /*              Compute L**H*A*L */

 		i__2 = *n;
 		i__1 = nb;
 		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
 /* Computing MIN */
 		    i__3 = *n - k + 1;
 		    kb = f2cmin(i__3,nb);

 /*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */

 		    i__3 = k - 1;
 		    ctrmm_("Right", uplo, "No transpose", "Non-unit", &kb, &
 			    i__3, &c_b1, &b[b_offset], ldb, &a[k + a_dim1], 
 			    lda);
 		    i__3 = k - 1;
 		    chemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1]
 			    , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1],
 			     lda);
 		    i__3 = k - 1;
 		    cher2k_(uplo, "Conjugate transpose", &i__3, &kb, &c_b1, &
 			    a[k + a_dim1], lda, &b[k + b_dim1], ldb, &c_b18, &
 			    a[a_offset], lda);
 		    i__3 = k - 1;
 		    chemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1]
 			    , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1],
 			     lda);
 		    i__3 = k - 1;
 		    ctrmm_("Left", uplo, "Conjugate transpose", "Non-unit", &
 			    kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, &a[k + 
 			    a_dim1], lda);
 		    chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
 			    k * b_dim1], ldb, info);
 /* L40: */
 		}
 	    }
 	}
    }
    return 0;

 /*     End of CHEGST */

 } /* chegst_ */

--- a/lapack-netlib/SRC/chegv.c
+++ b/lapack-netlib/SRC/chegv.c
@@ -0,0 +1,735 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b CHEGV */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEGV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chegv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chegv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chegv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */
 /*                         LWORK, RWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEGV computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite eigenproblem, of the form */
 /* > A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x. */
 /* > Here A and B are assumed to be Hermitian and B is also */
 /* > positive definite. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          Specifies the problem type to be solved: */
 /* >          = 1:  A*x = (lambda)*B*x */
 /* >          = 2:  A*B*x = (lambda)*x */
 /* >          = 3:  B*A*x = (lambda)*x */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* > */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          matrix Z of eigenvectors.  The eigenvectors are normalized */
 /* >          as follows: */
 /* >          if ITYPE = 1 or 2, Z**H*B*Z = I; */
 /* >          if ITYPE = 3, Z**H*inv(B)*Z = I. */
 /* >          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */
 /* >          or the lower triangle (if UPLO='L') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB, N) */
 /* >          On entry, the Hermitian positive definite matrix B. */
 /* >          If UPLO = 'U', the leading N-by-N upper triangular part of B */
 /* >          contains the upper triangular part of the matrix B. */
 /* >          If UPLO = 'L', the leading N-by-N lower triangular part of B */
 /* >          contains the lower triangular part of the matrix B. */
 /* > */
 /* >          On exit, if INFO <= N, the part of B containing the matrix is */
 /* >          overwritten by the triangular factor U or L from the Cholesky */
 /* >          factorization B = U**H*U or B = L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,2*N-1). */
 /* >          For optimal efficiency, LWORK >= (NB+1)*N, */
 /* >          where NB is the blocksize for CHETRD returned by ILAENV. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (f2cmax(1, 3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  CPOTRF or CHEEV returned an error code: */
 /* >             <= N:  if INFO = i, CHEEV failed to converge; */
 /* >                    i off-diagonal elements of an intermediate */
 /* >                    tridiagonal form did not converge to zero; */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
 /* >                    minor of order i of B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEeigen */

 /*  ===================================================================== */
 /* Subroutine */ int chegv_(integer *itype, char *jobz, char *uplo, integer *
 	n, complex *a, integer *lda, complex *b, integer *ldb, real *w, 
 	complex *work, integer *lwork, real *rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    integer neig;
    extern /* Subroutine */ int cheev_(char *, char *, integer *, complex *, 
 	    integer *, real *, complex *, integer *, real *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, complex *, complex *, integer *, complex *, 
 	    integer *);
    char trans[1];
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, complex *, complex *, integer *, complex *, 
 	    integer *);
    logical upper, wantz;
    integer nb;
    extern /* Subroutine */ int chegst_(integer *, char *, integer *, complex 
 	    *, integer *, complex *, integer *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), cpotrf_(
 	    char *, integer *, complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --w;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1;

    *info = 0;
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -6;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    }

    if (*info == 0) {
 	nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 /* Computing MAX */
 	i__1 = 1, i__2 = (nb + 1) * *n;
 	lwkopt = f2cmax(i__1,i__2);
 	work[1].r = (real) lwkopt, work[1].i = 0.f;

 /* Computing MAX */
 	i__1 = 1, i__2 = (*n << 1) - 1;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -11;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEGV ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a Cholesky factorization of B. */

    cpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem and solve. */

    chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    cheev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[1]
 	    , info);

    if (wantz) {

 /*        Backtransform eigenvectors to the original problem. */

 	neig = *n;
 	if (*info > 0) {
 	    neig = *info - 1;
 	}
 	if (*itype == 1 || *itype == 2) {

 /*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = inv(L)**H*y or inv(U)*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'N';
 	    } else {
 		*(unsigned char *)trans = 'C';
 	    }

 	    ctrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[
 		    b_offset], ldb, &a[a_offset], lda);

 	} else if (*itype == 3) {

 /*           For B*A*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = L*y or U**H*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'C';
 	    } else {
 		*(unsigned char *)trans = 'N';
 	    }

 	    ctrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[
 		    b_offset], ldb, &a[a_offset], lda);
 	}
    }

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

 /*     End of CHEGV */

 } /* chegv_ */

--- a/lapack-netlib/SRC/chegv_2stage.c
+++ b/lapack-netlib/SRC/chegv_2stage.c
@@ -0,0 +1,795 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static integer c__3 = 3;
 static integer c__4 = 4;

 /* > \brief \b CHEGV_2STAGE */

 /*  @generated from zhegv_2stage.f, fortran z -> c, Sun Nov  6 13:09:52 2016 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEGV_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chegv_2
 stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chegv_2
 stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chegv_2
 stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, */
 /*                                WORK, LWORK, RWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite eigenproblem, of the form */
 /* > A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x. */
 /* > Here A and B are assumed to be Hermitian and B is also */
 /* > positive definite. */
 /* > This routine use the 2stage technique for the reduction to tridiagonal */
 /* > which showed higher performance on recent architecture and for large */
 /* > sizes N>2000. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          Specifies the problem type to be solved: */
 /* >          = 1:  A*x = (lambda)*B*x */
 /* >          = 2:  A*B*x = (lambda)*x */
 /* >          = 3:  B*A*x = (lambda)*x */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* > */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          matrix Z of eigenvectors.  The eigenvectors are normalized */
 /* >          as follows: */
 /* >          if ITYPE = 1 or 2, Z**H*B*Z = I; */
 /* >          if ITYPE = 3, Z**H*inv(B)*Z = I. */
 /* >          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */
 /* >          or the lower triangle (if UPLO='L') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB, N) */
 /* >          On entry, the Hermitian positive definite matrix B. */
 /* >          If UPLO = 'U', the leading N-by-N upper triangular part of B */
 /* >          contains the upper triangular part of the matrix B. */
 /* >          If UPLO = 'L', the leading N-by-N lower triangular part of B */
 /* >          contains the lower triangular part of the matrix B. */
 /* > */
 /* >          On exit, if INFO <= N, the part of B containing the matrix is */
 /* >          overwritten by the triangular factor U or L from the Cholesky */
 /* >          factorization B = U**H*U or B = L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. LWORK >= 1, when N <= 1; */
 /* >          otherwise */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */
 /* >                                             = N*KD + N*f2cmax(KD+1,FACTOPTNB) */
 /* >                                               + f2cmax(2*KD*KD, KD*NTHREADS) */
 /* >                                               + (KD+1)*N + N */
 /* >                                   where KD is the blocking size of the reduction, */
 /* >                                   FACTOPTNB is the blocking used by the QR or LQ */
 /* >                                   algorithm, usually FACTOPTNB=128 is a good choice */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (f2cmax(1, 3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  CPOTRF or CHEEV returned an error code: */
 /* >             <= N:  if INFO = i, CHEEV failed to converge; */
 /* >                    i off-diagonal elements of an intermediate */
 /* >                    tridiagonal form did not converge to zero; */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
 /* >                    minor of order i of B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complexHEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int chegv_2stage_(integer *itype, char *jobz, char *uplo, 
 	integer *n, complex *a, integer *lda, complex *b, integer *ldb, real *
 	w, complex *work, integer *lwork, real *rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    integer neig;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    extern logical lsame_(char *, char *);
    integer lhtrd;
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, complex *, complex *, integer *, complex *, 
 	    integer *);
    integer lwmin;
    char trans[1];
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, complex *, complex *, integer *, complex *, 
 	    integer *);
    logical upper;
    integer lwtrd;
    logical wantz;
    integer ib, kd;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), chegst_(
 	    integer *, char *, integer *, complex *, integer *, complex *, 
 	    integer *, integer *), cpotrf_(char *, integer *, complex 
 	    *, integer *, integer *);
    logical lquery;
    extern /* Subroutine */ int cheev_2stage_(char *, char *, integer *, 
 	    complex *, integer *, real *, complex *, integer *, real *, 
 	    integer *);



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --w;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1;

    *info = 0;
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! lsame_(jobz, "N")) {
 	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -6;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    }

    if (*info == 0) {
 	kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", jobz, n, &c_n1, &c_n1, &
 		c_n1);
 	ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1);
 	lhtrd = ilaenv2stage_(&c__3, "CHETRD_2STAGE", jobz, n, &kd, &ib, &
 		c_n1);
 	lwtrd = ilaenv2stage_(&c__4, "CHETRD_2STAGE", jobz, n, &kd, &ib, &
 		c_n1);
 	lwmin = *n + lhtrd + lwtrd;
 	work[1].r = (real) lwmin, work[1].i = 0.f;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEGV_2STAGE ", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a Cholesky factorization of B. */

    cpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem and solve. */

    chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    cheev_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &
 	    rwork[1], info);

    if (wantz) {

 /*        Backtransform eigenvectors to the original problem. */

 	neig = *n;
 	if (*info > 0) {
 	    neig = *info - 1;
 	}
 	if (*itype == 1 || *itype == 2) {

 /*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'N';
 	    } else {
 		*(unsigned char *)trans = 'C';
 	    }

 	    ctrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[
 		    b_offset], ldb, &a[a_offset], lda);

 	} else if (*itype == 3) {

 /*           For B*A*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = L*y or U**H *y */

 	    if (upper) {
 		*(unsigned char *)trans = 'C';
 	    } else {
 		*(unsigned char *)trans = 'N';
 	    }

 	    ctrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[
 		    b_offset], ldb, &a[a_offset], lda);
 	}
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;

    return 0;

 /*     End of CHEGV_2STAGE */

 } /* chegv_2stage__ */

--- a/lapack-netlib/SRC/chegvd.c
+++ b/lapack-netlib/SRC/chegvd.c
@@ -0,0 +1,827 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};

 /* > \brief \b CHEGVD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEGVD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chegvd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chegvd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chegvd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */
 /*                          LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEGVD computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite eigenproblem, of the form */
 /* > A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and */
 /* > B are assumed to be Hermitian and B is also positive definite. */
 /* > If eigenvectors are desired, it uses a divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          Specifies the problem type to be solved: */
 /* >          = 1:  A*x = (lambda)*B*x */
 /* >          = 2:  A*B*x = (lambda)*x */
 /* >          = 3:  B*A*x = (lambda)*x */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* > */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          matrix Z of eigenvectors.  The eigenvectors are normalized */
 /* >          as follows: */
 /* >          if ITYPE = 1 or 2, Z**H*B*Z = I; */
 /* >          if ITYPE = 3, Z**H*inv(B)*Z = I. */
 /* >          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */
 /* >          or the lower triangle (if UPLO='L') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB, N) */
 /* >          On entry, the Hermitian matrix B.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of B contains the */
 /* >          upper triangular part of the matrix B.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of B contains */
 /* >          the lower triangular part of the matrix B. */
 /* > */
 /* >          On exit, if INFO <= N, the part of B containing the matrix is */
 /* >          overwritten by the triangular factor U or L from the Cholesky */
 /* >          factorization B = U**H*U or B = L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. */
 /* >          If N <= 1,                LWORK >= 1. */
 /* >          If JOBZ  = 'N' and N > 1, LWORK >= N + 1. */
 /* >          If JOBZ  = 'V' and N > 1, LWORK >= 2*N + N**2. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (MAX(1,LRWORK)) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of the array RWORK. */
 /* >          If N <= 1,                LRWORK >= 1. */
 /* >          If JOBZ  = 'N' and N > 1, LRWORK >= N. */
 /* >          If JOBZ  = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of the array IWORK. */
 /* >          If N <= 1,                LIWORK >= 1. */
 /* >          If JOBZ  = 'N' and N > 1, LIWORK >= 1. */
 /* >          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N. */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  CPOTRF or CHEEVD returned an error code: */
 /* >             <= N:  if INFO = i and JOBZ = 'N', then the algorithm */
 /* >                    failed to converge; i off-diagonal elements of an */
 /* >                    intermediate tridiagonal form did not converge to */
 /* >                    zero; */
 /* >                    if INFO = i and JOBZ = 'V', then the algorithm */
 /* >                    failed to compute an eigenvalue while working on */
 /* >                    the submatrix lying in rows and columns INFO/(N+1) */
 /* >                    through mod(INFO,N+1); */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
 /* >                    minor of order i of B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  Modified so that no backsubstitution is performed if CHEEVD fails to */
 /* >  converge (NEIG in old code could be greater than N causing out of */
 /* >  bounds reference to A - reported by Ralf Meyer).  Also corrected the */
 /* >  description of INFO and the test on ITYPE. Sven, 16 Feb 05. */
 /* > \endverbatim */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int chegvd_(integer *itype, char *jobz, char *uplo, integer *
 	n, complex *a, integer *lda, complex *b, integer *ldb, real *w, 
 	complex *work, integer *lwork, real *rwork, integer *lrwork, integer *
 	iwork, integer *liwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    real r__1, r__2;

    /* Local variables */
    integer lopt;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, complex *, complex *, integer *, complex *, 
 	    integer *);
    integer lwmin;
    char trans[1];
    integer liopt;
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, complex *, complex *, integer *, complex *, 
 	    integer *);
    logical upper;
    integer lropt;
    logical wantz;
    extern /* Subroutine */ int cheevd_(char *, char *, integer *, complex *, 
 	    integer *, real *, complex *, integer *, real *, integer *, 
 	    integer *, integer *, integer *), chegst_(integer 
 	    *, char *, integer *, complex *, integer *, complex *, integer *, 
 	    integer *), xerbla_(char *, integer *, ftnlen), cpotrf_(
 	    char *, integer *, complex *, integer *, integer *);
    integer liwmin, lrwmin;
    logical lquery;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --w;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (*n <= 1) {
 	lwmin = 1;
 	lrwmin = 1;
 	liwmin = 1;
    } else if (wantz) {
 	lwmin = (*n << 1) + *n * *n;
 	lrwmin = *n * 5 + 1 + (*n << 1) * *n;
 	liwmin = *n * 5 + 3;
    } else {
 	lwmin = *n + 1;
 	lrwmin = *n;
 	liwmin = 1;
    }
    lopt = lwmin;
    lropt = lrwmin;
    liopt = liwmin;
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -6;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    }

    if (*info == 0) {
 	work[1].r = (real) lopt, work[1].i = 0.f;
 	rwork[1] = (real) lropt;
 	iwork[1] = liopt;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -13;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -15;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEGVD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a Cholesky factorization of B. */

    cpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem and solve. */

    chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    cheevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[
 	    1], lrwork, &iwork[1], liwork, info);
 /* Computing MAX */
    r__1 = (real) lopt, r__2 = work[1].r;
    lopt = f2cmax(r__1,r__2);
 /* Computing MAX */
    r__1 = (real) lropt;
    lropt = f2cmax(r__1,rwork[1]);
 /* Computing MAX */
    r__1 = (real) liopt, r__2 = (real) iwork[1];
    liopt = f2cmax(r__1,r__2);

    if (wantz && *info == 0) {

 /*        Backtransform eigenvectors to the original problem. */

 	if (*itype == 1 || *itype == 2) {

 /*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'N';
 	    } else {
 		*(unsigned char *)trans = 'C';
 	    }

 	    ctrsm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset],
 		     ldb, &a[a_offset], lda);

 	} else if (*itype == 3) {

 /*           For B*A*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = L*y or U**H *y */

 	    if (upper) {
 		*(unsigned char *)trans = 'C';
 	    } else {
 		*(unsigned char *)trans = 'N';
 	    }

 	    ctrmm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset],
 		     ldb, &a[a_offset], lda);
 	}
    }

    work[1].r = (real) lopt, work[1].i = 0.f;
    rwork[1] = (real) lropt;
    iwork[1] = liopt;

    return 0;

 /*     End of CHEGVD */

 } /* chegvd_ */

--- a/lapack-netlib/SRC/chegvx.c
+++ b/lapack-netlib/SRC/chegvx.c
@@ -0,0 +1,894 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b CHEGVX */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHEGVX + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chegvx.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chegvx.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chegvx.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, */
 /*                          VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, */
 /*                          LWORK, RWORK, IWORK, IFAIL, INFO ) */

 /*       CHARACTER          JOBZ, RANGE, UPLO */
 /*       INTEGER            IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N */
 /*       REAL               ABSTOL, VL, VU */
 /*       INTEGER            IFAIL( * ), IWORK( * ) */
 /*       REAL               RWORK( * ), W( * ) */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHEGVX computes selected eigenvalues, and optionally, eigenvectors */
 /* > of a complex generalized Hermitian-definite eigenproblem, of the form */
 /* > A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and */
 /* > B are assumed to be Hermitian and B is also positive definite. */
 /* > Eigenvalues and eigenvectors can be selected by specifying either a */
 /* > range of values or a range of indices for the desired eigenvalues. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          Specifies the problem type to be solved: */
 /* >          = 1:  A*x = (lambda)*B*x */
 /* >          = 2:  A*B*x = (lambda)*x */
 /* >          = 3:  B*A*x = (lambda)*x */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] RANGE */
 /* > \verbatim */
 /* >          RANGE is CHARACTER*1 */
 /* >          = 'A': all eigenvalues will be found. */
 /* >          = 'V': all eigenvalues in the half-open interval (VL,VU] */
 /* >                 will be found. */
 /* >          = 'I': the IL-th through IU-th eigenvalues will be found. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* > */
 /* >          On exit,  the lower triangle (if UPLO='L') or the upper */
 /* >          triangle (if UPLO='U') of A, including the diagonal, is */
 /* >          destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB, N) */
 /* >          On entry, the Hermitian matrix B.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of B contains the */
 /* >          upper triangular part of the matrix B.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of B contains */
 /* >          the lower triangular part of the matrix B. */
 /* > */
 /* >          On exit, if INFO <= N, the part of B containing the matrix is */
 /* >          overwritten by the triangular factor U or L from the Cholesky */
 /* >          factorization B = U**H*U or B = L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] VL */
 /* > \verbatim */
 /* >          VL is REAL */
 /* > */
 /* >          If RANGE='V', the lower bound of the interval to */
 /* >          be searched for eigenvalues. VL < VU. */
 /* >          Not referenced if RANGE = 'A' or 'I'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] VU */
 /* > \verbatim */
 /* >          VU is REAL */
 /* > */
 /* >          If RANGE='V', the upper bound of the interval to */
 /* >          be searched for eigenvalues. VL < VU. */
 /* >          Not referenced if RANGE = 'A' or 'I'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IL */
 /* > \verbatim */
 /* >          IL is INTEGER */
 /* > */
 /* >          If RANGE='I', the index of the */
 /* >          smallest eigenvalue to be returned. */
 /* >          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
 /* >          Not referenced if RANGE = 'A' or 'V'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IU */
 /* > \verbatim */
 /* >          IU is INTEGER */
 /* > */
 /* >          If RANGE='I', the index of the */
 /* >          largest eigenvalue to be returned. */
 /* >          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
 /* >          Not referenced if RANGE = 'A' or 'V'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ABSTOL */
 /* > \verbatim */
 /* >          ABSTOL is REAL */
 /* >          The absolute error tolerance for the eigenvalues. */
 /* >          An approximate eigenvalue is accepted as converged */
 /* >          when it is determined to lie in an interval [a,b] */
 /* >          of width less than or equal to */
 /* > */
 /* >                  ABSTOL + EPS *   f2cmax( |a|,|b| ) , */
 /* > */
 /* >          where EPS is the machine precision.  If ABSTOL is less than */
 /* >          or equal to zero, then  EPS*|T|  will be used in its place, */
 /* >          where |T| is the 1-norm of the tridiagonal matrix obtained */
 /* >          by reducing C to tridiagonal form, where C is the symmetric */
 /* >          matrix of the standard symmetric problem to which the */
 /* >          generalized problem is transformed. */
 /* > */
 /* >          Eigenvalues will be computed most accurately when ABSTOL is */
 /* >          set to twice the underflow threshold 2*SLAMCH('S'), not zero. */
 /* >          If this routine returns with INFO>0, indicating that some */
 /* >          eigenvectors did not converge, try setting ABSTOL to */
 /* >          2*SLAMCH('S'). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The total number of eigenvalues found.  0 <= M <= N. */
 /* >          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is REAL array, dimension (N) */
 /* >          The first M elements contain the selected */
 /* >          eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX array, dimension (LDZ, f2cmax(1,M)) */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* >          If JOBZ = 'V', then if INFO = 0, the first M columns of Z */
 /* >          contain the orthonormal eigenvectors of the matrix A */
 /* >          corresponding to the selected eigenvalues, with the i-th */
 /* >          column of Z holding the eigenvector associated with W(i). */
 /* >          The eigenvectors are normalized as follows: */
 /* >          if ITYPE = 1 or 2, Z**T*B*Z = I; */
 /* >          if ITYPE = 3, Z**T*inv(B)*Z = I. */
 /* > */
 /* >          If an eigenvector fails to converge, then that column of Z */
 /* >          contains the latest approximation to the eigenvector, and the */
 /* >          index of the eigenvector is returned in IFAIL. */
 /* >          Note: the user must ensure that at least f2cmax(1,M) columns are */
 /* >          supplied in the array Z; if RANGE = 'V', the exact value of M */
 /* >          is not known in advance and an upper bound must be used. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,2*N). */
 /* >          For optimal efficiency, LWORK >= (NB+1)*N, */
 /* >          where NB is the blocksize for CHETRD returned by ILAENV. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (7*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (5*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IFAIL */
 /* > \verbatim */
 /* >          IFAIL is INTEGER array, dimension (N) */
 /* >          If JOBZ = 'V', then if INFO = 0, the first M elements of */
 /* >          IFAIL are zero.  If INFO > 0, then IFAIL contains the */
 /* >          indices of the eigenvectors that failed to converge. */
 /* >          If JOBZ = 'N', then IFAIL is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  CPOTRF or CHEEVX returned an error code: */
 /* >             <= N:  if INFO = i, CHEEVX failed to converge; */
 /* >                    i eigenvectors failed to converge.  Their indices */
 /* >                    are stored in array IFAIL. */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
 /* >                    minor of order i of B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup complexHEeigen */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

 /*  ===================================================================== */
 /* Subroutine */ int chegvx_(integer *itype, char *jobz, char *range, char *
 	uplo, integer *n, complex *a, integer *lda, complex *b, integer *ldb, 
 	real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *
 	m, real *w, complex *z__, integer *ldz, complex *work, integer *lwork,
 	 real *rwork, integer *iwork, integer *ifail, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1, i__2;

    /* Local variables */
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, complex *, complex *, integer *, complex *, 
 	    integer *);
    char trans[1];
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, complex *, complex *, integer *, complex *, 
 	    integer *);
    logical upper, wantz;
    integer nb;
    logical alleig, indeig, valeig;
    extern /* Subroutine */ int chegst_(integer *, char *, integer *, complex 
 	    *, integer *, complex *, integer *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), cheevx_(
 	    char *, char *, char *, integer *, complex *, integer *, real *, 
 	    real *, integer *, integer *, real *, integer *, real *, complex *
 	    , integer *, complex *, integer *, real *, integer *, integer *, 
 	    integer *), cpotrf_(char *, integer *, 
 	    complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;
    --ifail;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    alleig = lsame_(range, "A");
    valeig = lsame_(range, "V");
    indeig = lsame_(range, "I");
    lquery = *lwork == -1;

    *info = 0;
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -2;
    } else if (! (alleig || valeig || indeig)) {
 	*info = -3;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -4;
    } else if (*n < 0) {
 	*info = -5;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -9;
    } else {
 	if (valeig) {
 	    if (*n > 0 && *vu <= *vl) {
 		*info = -11;
 	    }
 	} else if (indeig) {
 	    if (*il < 1 || *il > f2cmax(1,*n)) {
 		*info = -12;
 	    } else if (*iu < f2cmin(*n,*il) || *iu > *n) {
 		*info = -13;
 	    }
 	}
    }
    if (*info == 0) {
 	if (*ldz < 1 || wantz && *ldz < *n) {
 	    *info = -18;
 	}
    }

    if (*info == 0) {
 	nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 /* Computing MAX */
 	i__1 = 1, i__2 = (nb + 1) * *n;
 	lwkopt = f2cmax(i__1,i__2);
 	work[1].r = (real) lwkopt, work[1].i = 0.f;

 /* Computing MAX */
 	i__1 = 1, i__2 = *n << 1;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -20;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHEGVX", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    *m = 0;
    if (*n == 0) {
 	return 0;
    }

 /*     Form a Cholesky factorization of B. */

    cpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem and solve. */

    chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    cheevx_(jobz, range, uplo, n, &a[a_offset], lda, vl, vu, il, iu, abstol, 
 	    m, &w[1], &z__[z_offset], ldz, &work[1], lwork, &rwork[1], &iwork[
 	    1], &ifail[1], info);

    if (wantz) {

 /*        Backtransform eigenvectors to the original problem. */

 	if (*info > 0) {
 	    *m = *info - 1;
 	}
 	if (*itype == 1 || *itype == 2) {

 /*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = inv(L)**H*y or inv(U)*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'N';
 	    } else {
 		*(unsigned char *)trans = 'C';
 	    }

 	    ctrsm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset],
 		     ldb, &z__[z_offset], ldz);

 	} else if (*itype == 3) {

 /*           For B*A*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = L*y or U**H*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'C';
 	    } else {
 		*(unsigned char *)trans = 'N';
 	    }

 	    ctrmm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset],
 		     ldb, &z__[z_offset], ldz);
 	}
    }

 /*     Set WORK(1) to optimal complex workspace size. */

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

 /*     End of CHEGVX */

 } /* chegvx_ */

--- a/lapack-netlib/SRC/cherfs.c
+++ b/lapack-netlib/SRC/cherfs.c
@@ -0,0 +1,924 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {1.f,0.f};
 static integer c__1 = 1;

 /* > \brief \b CHERFS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHERFS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cherfs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cherfs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cherfs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */
 /*                          X, LDX, FERR, BERR, WORK, RWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               BERR( * ), FERR( * ), RWORK( * ) */
 /*       COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
 /*      $                   WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHERFS improves the computed solution to a system of linear */
 /* > equations when the coefficient matrix is Hermitian indefinite, and */
 /* > provides error bounds and backward error estimates for the solution. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrices B and X.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N */
 /* >          upper triangular part of A contains the upper triangular part */
 /* >          of the matrix A, and the strictly lower triangular part of A */
 /* >          is not referenced.  If UPLO = 'L', the leading N-by-N lower */
 /* >          triangular part of A contains the lower triangular part of */
 /* >          the matrix A, and the strictly upper triangular part of A is */
 /* >          not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AF */
 /* > \verbatim */
 /* >          AF is COMPLEX array, dimension (LDAF,N) */
 /* >          The factored form of the matrix A.  AF contains the block */
 /* >          diagonal matrix D and the multipliers used to obtain the */
 /* >          factor U or L from the factorization A = U*D*U**H or */
 /* >          A = L*D*L**H as computed by CHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAF */
 /* > \verbatim */
 /* >          LDAF is INTEGER */
 /* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D */
 /* >          as determined by CHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          The right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] X */
 /* > \verbatim */
 /* >          X is COMPLEX array, dimension (LDX,NRHS) */
 /* >          On entry, the solution matrix X, as computed by CHETRS. */
 /* >          On exit, the improved solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is REAL array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is REAL array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  ITMAX is the maximum number of steps of iterative refinement. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int cherfs_(char *uplo, integer *n, integer *nrhs, complex *
 	a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex *
 	b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, 
 	complex *work, real *rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
 	    x_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4;
    complex q__1;

    /* Local variables */
    integer kase;
    real safe1, safe2;
    integer i__, j, k;
    real s;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex *
 	    , integer *, complex *, integer *, complex *, complex *, integer *
 	    );
    integer isave[3];
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
 	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
 	    integer *, complex *, integer *);
    integer count;
    logical upper;
    extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real 
 	    *, integer *, integer *);
    real xk;
    extern real slamch_(char *);
    integer nz;
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), chetrs_(
 	    char *, integer *, integer *, complex *, integer *, integer *, 
 	    complex *, integer *, integer *);
    real lstres, eps;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldaf < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -10;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -12;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHERFS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	i__1 = *nrhs;
 	for (j = 1; j <= i__1; ++j) {
 	    ferr[j] = 0.f;
 	    berr[j] = 0.f;
 /* L10: */
 	}
 	return 0;
    }

 /*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

 /*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

 	count = 1;
 	lstres = 3.f;
 L20:

 /*        Loop until stopping criterion is satisfied. */

 /*        Compute residual R = B - A * X */

 	ccopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);
 	q__1.r = -1.f, q__1.i = 0.f;
 	chemv_(uplo, n, &q__1, &a[a_offset], lda, &x[j * x_dim1 + 1], &c__1, &
 		c_b1, &work[1], &c__1);

 /*        Compute componentwise relative backward error from formula */

 /*        f2cmax(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */

 /*        where abs(Z) is the componentwise absolute value of the matrix */
 /*        or vector Z.  If the i-th component of the denominator is less */
 /*        than SAFE2, then SAFE1 is added to the i-th components of the */
 /*        numerator and denominator before dividing. */

 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    i__3 = i__ + j * b_dim1;
 	    rwork[i__] = (r__1 = b[i__3].r, abs(r__1)) + (r__2 = r_imag(&b[
 		    i__ + j * b_dim1]), abs(r__2));
 /* L30: */
 	}

 /*        Compute abs(A)*abs(X) + abs(B). */

 	if (upper) {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		s = 0.f;
 		i__3 = k + j * x_dim1;
 		xk = (r__1 = x[i__3].r, abs(r__1)) + (r__2 = r_imag(&x[k + j *
 			 x_dim1]), abs(r__2));
 		i__3 = k - 1;
 		for (i__ = 1; i__ <= i__3; ++i__) {
 		    i__4 = i__ + k * a_dim1;
 		    rwork[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = 
 			    r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * xk;
 		    i__4 = i__ + k * a_dim1;
 		    i__5 = i__ + j * x_dim1;
 		    s += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[
 			    i__ + k * a_dim1]), abs(r__2))) * ((r__3 = x[i__5]
 			    .r, abs(r__3)) + (r__4 = r_imag(&x[i__ + j * 
 			    x_dim1]), abs(r__4)));
 /* L40: */
 		}
 		i__3 = k + k * a_dim1;
 		rwork[k] = rwork[k] + (r__1 = a[i__3].r, abs(r__1)) * xk + s;
 /* L50: */
 	    }
 	} else {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		s = 0.f;
 		i__3 = k + j * x_dim1;
 		xk = (r__1 = x[i__3].r, abs(r__1)) + (r__2 = r_imag(&x[k + j *
 			 x_dim1]), abs(r__2));
 		i__3 = k + k * a_dim1;
 		rwork[k] += (r__1 = a[i__3].r, abs(r__1)) * xk;
 		i__3 = *n;
 		for (i__ = k + 1; i__ <= i__3; ++i__) {
 		    i__4 = i__ + k * a_dim1;
 		    rwork[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = 
 			    r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * xk;
 		    i__4 = i__ + k * a_dim1;
 		    i__5 = i__ + j * x_dim1;
 		    s += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[
 			    i__ + k * a_dim1]), abs(r__2))) * ((r__3 = x[i__5]
 			    .r, abs(r__3)) + (r__4 = r_imag(&x[i__ + j * 
 			    x_dim1]), abs(r__4)));
 /* L60: */
 		}
 		rwork[k] += s;
 /* L70: */
 	    }
 	}
 	s = 0.f;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (rwork[i__] > safe2) {
 /* Computing MAX */
 		i__3 = i__;
 		r__3 = s, r__4 = ((r__1 = work[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&work[i__]), abs(r__2))) / rwork[i__];
 		s = f2cmax(r__3,r__4);
 	    } else {
 /* Computing MAX */
 		i__3 = i__;
 		r__3 = s, r__4 = ((r__1 = work[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&work[i__]), abs(r__2)) + safe1) / (rwork[i__] 
 			+ safe1);
 		s = f2cmax(r__3,r__4);
 	    }
 /* L80: */
 	}
 	berr[j] = s;

 /*        Test stopping criterion. Continue iterating if */
 /*           1) The residual BERR(J) is larger than machine epsilon, and */
 /*           2) BERR(J) decreased by at least a factor of 2 during the */
 /*              last iteration, and */
 /*           3) At most ITMAX iterations tried. */

 	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

 /*           Update solution and try again. */

 	    chetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], 
 		    n, info);
 	    caxpy_(n, &c_b1, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1);
 	    lstres = berr[j];
 	    ++count;
 	    goto L20;
 	}

 /*        Bound error from formula */

 /*        norm(X - XTRUE) / norm(X) .le. FERR = */
 /*        norm( abs(inv(A))* */
 /*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */

 /*        where */
 /*          norm(Z) is the magnitude of the largest component of Z */
 /*          inv(A) is the inverse of A */
 /*          abs(Z) is the componentwise absolute value of the matrix or */
 /*             vector Z */
 /*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
 /*          EPS is machine epsilon */

 /*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */
 /*        is incremented by SAFE1 if the i-th component of */
 /*        abs(A)*abs(X) + abs(B) is less than SAFE2. */

 /*        Use CLACN2 to estimate the infinity-norm of the matrix */
 /*           inv(A) * diag(W), */
 /*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */

 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (rwork[i__] > safe2) {
 		i__3 = i__;
 		rwork[i__] = (r__1 = work[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&work[i__]), abs(r__2)) + nz * eps * rwork[i__]
 			;
 	    } else {
 		i__3 = i__;
 		rwork[i__] = (r__1 = work[i__3].r, abs(r__1)) + (r__2 = 
 			r_imag(&work[i__]), abs(r__2)) + nz * eps * rwork[i__]
 			 + safe1;
 	    }
 /* L90: */
 	}

 	kase = 0;
 L100:
 	clacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave);
 	if (kase != 0) {
 	    if (kase == 1) {

 /*              Multiply by diag(W)*inv(A**H). */

 		chetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[
 			1], n, info);
 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__;
 		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
 			    * work[i__5].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 /* L110: */
 		}
 	    } else if (kase == 2) {

 /*              Multiply by inv(A)*diag(W). */

 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__;
 		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
 			    * work[i__5].i;
 		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
 /* L120: */
 		}
 		chetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[
 			1], n, info);
 	    }
 	    goto L100;
 	}

 /*        Normalize error. */

 	lstres = 0.f;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    i__3 = i__ + j * x_dim1;
 	    r__3 = lstres, r__4 = (r__1 = x[i__3].r, abs(r__1)) + (r__2 = 
 		    r_imag(&x[i__ + j * x_dim1]), abs(r__2));
 	    lstres = f2cmax(r__3,r__4);
 /* L130: */
 	}
 	if (lstres != 0.f) {
 	    ferr[j] /= lstres;
 	}

 /* L140: */
    }

    return 0;

 /*     End of CHERFS */

 } /* cherfs_ */

--- a/lapack-netlib/SRC/cherfsx.c
+++ b/lapack-netlib/SRC/cherfsx.c
@@ -0,0 +1,381 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
--- a/lapack-netlib/SRC/chesv.c
+++ b/lapack-netlib/SRC/chesv.c
@@ -0,0 +1,676 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief <b> CHESV computes the solution to system of linear equations A * X = B for HE matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHESV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chesv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chesv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chesv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */
 /*                         LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHESV computes the solution to a complex system of linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > The diagonal pivoting method is used to factor A as */
 /* >    A = U * D * U**H,  if UPLO = 'U', or */
 /* >    A = L * D * L**H,  if UPLO = 'L', */
 /* > where U (or L) is a product of permutation and unit upper (lower) */
 /* > triangular matrices, and D is Hermitian and block diagonal with */
 /* > 1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then */
 /* > used to solve the system of equations A * X = B. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the block diagonal matrix D and the */
 /* >          multipliers used to obtain the factor U or L from the */
 /* >          factorization A = U*D*U**H or A = L*D*L**H as computed by */
 /* >          CHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D, as */
 /* >          determined by CHETRF.  If IPIV(k) > 0, then rows and columns */
 /* >          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 */
 /* >          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, */
 /* >          then rows and columns k-1 and -IPIV(k) were interchanged and */
 /* >          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and */
 /* >          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and */
 /* >          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 */
 /* >          diagonal block. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= 1, and for best performance */
 /* >          LWORK >= f2cmax(1,N*NB), where NB is the optimal blocksize for */
 /* >          CHETRF. */
 /* >          for LWORK < N, TRS will be done with Level BLAS 2 */
 /* >          for LWORK >= N, TRS will be done with Level BLAS 3 */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization */
 /* >               has been completed, but the block diagonal matrix D is */
 /* >               exactly singular, so the solution could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int chesv_(char *uplo, integer *n, integer *nrhs, complex *a,
 	 integer *lda, integer *ipiv, complex *b, integer *ldb, complex *work,
 	 integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern logical lsame_(char *, char *);
    integer nb;
    extern /* Subroutine */ int chetrf_(char *, integer *, complex *, integer 
 	    *, integer *, complex *, integer *, integer *), xerbla_(
 	    char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int chetrs_(char *, integer *, integer *, complex 
 	    *, integer *, integer *, complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int chetrs2_(char *, integer *, integer *, 
 	    complex *, integer *, integer *, complex *, integer *, complex *, 
 	    integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -10;
    }

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkopt = 1;
 	} else {
 	    nb = ilaenv_(&c__1, "CHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (
 		    ftnlen)6, (ftnlen)1);
 	    lwkopt = *n * nb;
 	}
 	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHESV ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Compute the factorization A = U*D*U**H or A = L*D*L**H. */

    chetrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 	if (*lwork < *n) {

 /*        Solve with TRS ( Use Level BLAS 2) */

 	    chetrs_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], 
 		    ldb, info);

 	} else {

 /*        Solve with TRS2 ( Use Level BLAS 3) */

 	    chetrs2_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset],
 		     ldb, &work[1], info);

 	}

    }

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

 /*     End of CHESV */

 } /* chesv_ */

--- a/lapack-netlib/SRC/chesv_aa.c
+++ b/lapack-netlib/SRC/chesv_aa.c
@@ -0,0 +1,652 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;

 /* > \brief <b> CHESV_AA computes the solution to system of linear equations A * X = B for HE matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHESV_AA + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chesv_a
 a.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chesv_a
 a.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chesv_a
 a.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */
 /*                            LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHESV_AA computes the solution to a complex system of linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > Aasen's algorithm is used to factor A as */
 /* >    A = U**H * T * U,  if UPLO = 'U', or */
 /* >    A = L * T * L**H,  if UPLO = 'L', */
 /* > where U (or L) is a product of permutation and unit upper (lower) */
 /* > triangular matrices, and T is Hermitian and tridiagonal. The factored form */
 /* > of A is then used to solve the system of equations A * X = B. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the tridiagonal matrix T and the */
 /* >          multipliers used to obtain the factor U or L from the */
 /* >          factorization A = U**H*T*U or A = L*T*L**H as computed by */
 /* >          CHETRF_AA. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          On exit, it contains the details of the interchanges, i.e., */
 /* >          the row and column k of A were interchanged with the */
 /* >          row and column IPIV(k). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= MAX(1,2*N,3*N-2), and for best */
 /* >          performance LWORK >= MAX(1,N*NB), where NB is the optimal */
 /* >          blocksize for CHETRF. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization */
 /* >               has been completed, but the block diagonal matrix D is */
 /* >               exactly singular, so the solution could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complexHEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int chesv_aa_(char *uplo, integer *n, integer *nrhs, 
 	complex *a, integer *lda, integer *ipiv, complex *b, integer *ldb, 
 	complex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    integer lwkopt_hetrf__, lwkopt_hetrs__;
    extern /* Subroutine */ int chetrf_aa_(char *, integer *, complex *, 
 	    integer *, integer *, complex *, integer *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int chetrs_aa_(char *, integer *, integer *, 
 	    complex *, integer *, integer *, complex *, integer *, complex *, 
 	    integer *, integer *), xerbla_(char *, integer *, ftnlen);
    integer lwkopt;
    logical lquery;


 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = *n << 1, i__2 = *n * 3 - 2;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -10;
 	}
    }

    if (*info == 0) {
 	chetrf_aa_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], &c_n1, 
 		info);
 	lwkopt_hetrf__ = (integer) work[1].r;
 	chetrs_aa_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], 
 		ldb, &work[1], &c_n1, info);
 	lwkopt_hetrs__ = (integer) work[1].r;
 	lwkopt = f2cmax(lwkopt_hetrf__,lwkopt_hetrs__);
 	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHESV_AA ", &i__1, (ftnlen)9);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Compute the factorization A = U**H*T*U or A = L*T*L**H. */

    chetrf_aa_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 	chetrs_aa_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], 
 		ldb, &work[1], lwork, info);

    }

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

 /*     End of CHESV_AA */

 } /* chesv_aa__ */

--- a/lapack-netlib/SRC/chesv_aa_2stage.c
+++ b/lapack-netlib/SRC/chesv_aa_2stage.c
@@ -0,0 +1,679 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;

 /* > \brief <b> CHESV_AA_2STAGE computes the solution to system of linear equations A * X = B for HE matrices
 </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHESV_AA_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chesv_a
 a_2stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chesv_a
 a_2stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chesv_a
 a_2stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*      SUBROUTINE CHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, */
 /*                                  IPIV, IPIV2, B, LDB, WORK, LWORK, */
 /*                                  INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            N, NRHS, LDA, LTB, LDB, LWORK, INFO */
 /*       INTEGER            IPIV( * ), IPIV2( * ) */
 /*       COMPLEX            A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) */

 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHESV_AA_2STAGE computes the solution to a complex system of */
 /* > linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > Aasen's 2-stage algorithm is used to factor A as */
 /* >    A = U**H * T * U,  if UPLO = 'U', or */
 /* >    A = L * T * L**H,  if UPLO = 'L', */
 /* > where U (or L) is a product of permutation and unit upper (lower) */
 /* > triangular matrices, and T is Hermitian and band. The matrix T is */
 /* > then LU-factored with partial pivoting. The factored form of A */
 /* > is then used to solve the system of equations A * X = B. */
 /* > */
 /* > This is the blocked version of the algorithm, calling Level 3 BLAS. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, L is stored below (or above) the subdiaonal blocks, */
 /* >          when UPLO  is 'L' (or 'U'). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TB */
 /* > \verbatim */
 /* >          TB is COMPLEX array, dimension (LTB) */
 /* >          On exit, details of the LU factorization of the band matrix. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LTB */
 /* > \verbatim */
 /* >          LTB is INTEGER */
 /* >          The size of the array TB. LTB >= 4*N, internally */
 /* >          used to select NB such that LTB >= (3*NB+1)*N. */
 /* > */
 /* >          If LTB = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal size of LTB, */
 /* >          returns this value as the first entry of TB, and */
 /* >          no error message related to LTB is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          On exit, it contains the details of the interchanges, i.e., */
 /* >          the row and column k of A were interchanged with the */
 /* >          row and column IPIV(k). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV2 */
 /* > \verbatim */
 /* >          IPIV2 is INTEGER array, dimension (N) */
 /* >          On exit, it contains the details of the interchanges, i.e., */
 /* >          the row and column k of T were interchanged with the */
 /* >          row and column IPIV(k). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          On entry, the right hand side matrix B. */
 /* >          On exit, the solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX workspace of size LWORK */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The size of WORK. LWORK >= N, internally used to select NB */
 /* >          such that LWORK >= N*NB. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal size of the WORK array, */
 /* >          returns this value as the first entry of the WORK array, and */
 /* >          no error message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, band LU factorization failed on i-th column */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complexSYcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int chesv_aa_2stage_(char *uplo, integer *n, integer *nrhs, 
 	complex *a, integer *lda, complex *tb, integer *ltb, integer *ipiv, 
 	integer *ipiv2, complex *b, integer *ldb, complex *work, integer *
 	lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern /* Subroutine */ int chetrs_aa_2stage_(char *, integer *, integer 
 	    *, complex *, integer *, complex *, integer *, integer *, integer 
 	    *, complex *, integer *, integer *);
    extern logical lsame_(char *, char *);
    logical upper;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    integer lwkopt;
    logical tquery, wquery;
    extern /* Subroutine */ int chetrf_aa_2stage_(char *, integer *, complex 
 	    *, integer *, complex *, integer *, integer *, integer *, complex 
 	    *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */



 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tb;
    --ipiv;
    --ipiv2;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    wquery = *lwork == -1;
    tquery = *ltb == -1;
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ltb < *n << 2 && ! tquery) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -11;
    } else if (*lwork < *n && ! wquery) {
 	*info = -13;
    }

    if (*info == 0) {
 	chetrf_aa_2stage_(uplo, n, &a[a_offset], lda, &tb[1], &c_n1, &ipiv[1]
 		, &ipiv2[1], &work[1], &c_n1, info);
 	lwkopt = (integer) work[1].r;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHESV_AA_2STAGE", &i__1, (ftnlen)15);
 	return 0;
    } else if (wquery || tquery) {
 	return 0;
    }


 /*     Compute the factorization A = U**H*T*U or A = L*T*L**H. */

    chetrf_aa_2stage_(uplo, n, &a[a_offset], lda, &tb[1], ltb, &ipiv[1], &
 	    ipiv2[1], &work[1], lwork, info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 	chetrs_aa_2stage_(uplo, n, nrhs, &a[a_offset], lda, &tb[1], ltb, &
 		ipiv[1], &ipiv2[1], &b[b_offset], ldb, info);

    }

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

 /*     End of CHESV_AA_2STAGE */

 } /* chesv_aa_2stage__ */

--- a/lapack-netlib/SRC/chesv_rk.c
+++ b/lapack-netlib/SRC/chesv_rk.c
@@ -0,0 +1,716 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;

 /* > \brief <b> CHESV_RK computes the solution to system of linear equations A * X = B for SY matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHESV_RK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chesv_r
 k.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chesv_r
 k.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chesv_r
 k.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHESV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, */
 /*                            WORK, LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > CHESV_RK computes the solution to a complex system of linear */
 /* > equations A * X = B, where A is an N-by-N Hermitian matrix */
 /* > and X and B are N-by-NRHS matrices. */
 /* > */
 /* > The bounded Bunch-Kaufman (rook) diagonal pivoting method is used */
 /* > to factor A as */
 /* >    A = P*U*D*(U**H)*(P**T),  if UPLO = 'U', or */
 /* >    A = P*L*D*(L**H)*(P**T),  if UPLO = 'L', */
 /* > where U (or L) is unit upper (or lower) triangular matrix, */
 /* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */
 /* > matrix, P**T is the transpose of P, and D is Hermitian and block */
 /* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */
 /* > */
 /* > CHETRF_RK is called to compute the factorization of a complex */
 /* > Hermitian matrix.  The factored form of A is then used to solve */
 /* > the system of equations A * X = B by calling BLAS3 routine CHETRS_3. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the upper or lower triangular part of the */
 /* >          Hermitian matrix A is stored: */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A. */
 /* >            If UPLO = 'U': the leading N-by-N upper triangular part */
 /* >            of A contains the upper triangular part of the matrix A, */
 /* >            and the strictly lower triangular part of A is not */
 /* >            referenced. */
 /* > */
 /* >            If UPLO = 'L': the leading N-by-N lower triangular part */
 /* >            of A contains the lower triangular part of the matrix A, */
 /* >            and the strictly upper triangular part of A is not */
 /* >            referenced. */
 /* > */
 /* >          On exit, if INFO = 0, diagonal of the block diagonal */
 /* >          matrix D and factors U or L  as computed by CHETRF_RK: */
 /* >            a) ONLY diagonal elements of the Hermitian block diagonal */
 /* >               matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */
 /* >               (superdiagonal (or subdiagonal) elements of D */
 /* >                are stored on exit in array E), and */
 /* >            b) If UPLO = 'U': factor U in the superdiagonal part of A. */
 /* >               If UPLO = 'L': factor L in the subdiagonal part of A. */
 /* > */
 /* >          For more info see the description of CHETRF_RK routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is COMPLEX array, dimension (N) */
 /* >          On exit, contains the output computed by the factorization */
 /* >          routine CHETRF_RK, i.e. the superdiagonal (or subdiagonal) */
 /* >          elements of the Hermitian block diagonal matrix D */
 /* >          with 1-by-1 or 2-by-2 diagonal blocks, where */
 /* >          If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; */
 /* >          If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. */
 /* > */
 /* >          NOTE: For 1-by-1 diagonal block D(k), where */
 /* >          1 <= k <= N, the element E(k) is set to 0 in both */
 /* >          UPLO = 'U' or UPLO = 'L' cases. */
 /* > */
 /* >          For more info see the description of CHETRF_RK routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D, */
 /* >          as determined by CHETRF_RK. */
 /* > */
 /* >          For more info see the description of CHETRF_RK routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension ( MAX(1,LWORK) ). */
 /* >          Work array used in the factorization stage. */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= 1. For best performance */
 /* >          of factorization stage LWORK >= f2cmax(1,N*NB), where NB is */
 /* >          the optimal blocksize for CHETRF_RK. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; */
 /* >          the routine only calculates the optimal size of the WORK */
 /* >          array for factorization stage, returns this value as */
 /* >          the first entry of the WORK array, and no error message */
 /* >          related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* > */
 /* >          < 0: If INFO = -k, the k-th argument had an illegal value */
 /* > */
 /* >          > 0: If INFO = k, the matrix A is singular, because: */
 /* >                 If UPLO = 'U': column k in the upper */
 /* >                 triangular part of A contains all zeros. */
 /* >                 If UPLO = 'L': column k in the lower */
 /* >                 triangular part of A contains all zeros. */
 /* > */
 /* >               Therefore D(k,k) is exactly zero, and superdiagonal */
 /* >               elements of column k of U (or subdiagonal elements of */
 /* >               column k of L ) are all zeros. The factorization has */
 /* >               been completed, but the block diagonal matrix D is */
 /* >               exactly singular, and division by zero will occur if */
 /* >               it is used to solve a system of equations. */
 /* > */
 /* >               NOTE: INFO only stores the first occurrence of */
 /* >               a singularity, any subsequent occurrence of singularity */
 /* >               is not stored in INFO even though the factorization */
 /* >               always completes. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEsolve */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  December 2016,  Igor Kozachenko, */
 /* >                  Computer Science Division, */
 /* >                  University of California, Berkeley */
 /* > */
 /* >  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */
 /* >                  School of Mathematics, */
 /* >                  University of Manchester */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int chesv_rk_(char *uplo, integer *n, integer *nrhs, 
 	complex *a, integer *lda, complex *e, integer *ipiv, complex *b, 
 	integer *ldb, complex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int chetrf_rk_(char *, integer *, complex *, 
 	    integer *, complex *, integer *, complex *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int chetrs_3_(char *, integer *, integer *, 
 	    complex *, integer *, complex *, integer *, complex *, integer *, 
 	    integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --e;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -9;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -11;
    }

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkopt = 1;
 	} else {
 	    chetrf_rk_(uplo, n, &a[a_offset], lda, &e[1], &ipiv[1], &work[1],
 		     &c_n1, info);
 	    lwkopt = work[1].r;
 	}
 	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHESV_RK ", &i__1,(ftnlen)9);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Compute the factorization A = U*D*U**T or A = L*D*L**T. */

    chetrf_rk_(uplo, n, &a[a_offset], lda, &e[1], &ipiv[1], &work[1], lwork, 
 	    info);

    if (*info == 0) {

 /*        Solve the system A*X = B with BLAS3 solver, overwriting B with X. */

 	chetrs_3_(uplo, n, nrhs, &a[a_offset], lda, &e[1], &ipiv[1], &b[
 		b_offset], ldb, info);

    }

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

 /*     End of CHESV_RK */

 } /* chesv_rk__ */

--- a/lapack-netlib/SRC/chesv_rook.c
+++ b/lapack-netlib/SRC/chesv_rook.c
@@ -0,0 +1,697 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b CHESV_ROOK computes the solution to a system of linear equations A * X = B for HE matrices usin
 g the bounded Bunch-Kaufman ("rook") diagonal pivoting method */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHESV_ROOK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chesv_r
 ook.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chesv_r
 ook.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chesv_r
 ook.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */
 /*                              LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHESV_ROOK computes the solution to a complex system of linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used */
 /* > to factor A as */
 /* >    A = U * D * U**T,  if UPLO = 'U', or */
 /* >    A = L * D * L**T,  if UPLO = 'L', */
 /* > where U (or L) is a product of permutation and unit upper (lower) */
 /* > triangular matrices, and D is Hermitian and block diagonal with */
 /* > 1-by-1 and 2-by-2 diagonal blocks. */
 /* > */
 /* > CHETRF_ROOK is called to compute the factorization of a complex */
 /* > Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal */
 /* > pivoting method. */
 /* > */
 /* > The factored form of A is then used to solve the system */
 /* > of equations A * X = B by calling CHETRS_ROOK (uses BLAS 2). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the block diagonal matrix D and the */
 /* >          multipliers used to obtain the factor U or L from the */
 /* >          factorization A = U*D*U**H or A = L*D*L**H as computed by */
 /* >          CHETRF_ROOK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D. */
 /* > */
 /* >          If UPLO = 'U': */
 /* >             Only the last KB elements of IPIV are set. */
 /* > */
 /* >             If IPIV(k) > 0, then rows and columns k and IPIV(k) were */
 /* >             interchanged and D(k,k) is a 1-by-1 diagonal block. */
 /* > */
 /* >             If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and */
 /* >             columns k and -IPIV(k) were interchanged and rows and */
 /* >             columns k-1 and -IPIV(k-1) were inerchaged, */
 /* >             D(k-1:k,k-1:k) is a 2-by-2 diagonal block. */
 /* > */
 /* >          If UPLO = 'L': */
 /* >             Only the first KB elements of IPIV are set. */
 /* > */
 /* >             If IPIV(k) > 0, then rows and columns k and IPIV(k) */
 /* >             were interchanged and D(k,k) is a 1-by-1 diagonal block. */
 /* > */
 /* >             If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and */
 /* >             columns k and -IPIV(k) were interchanged and rows and */
 /* >             columns k+1 and -IPIV(k+1) were inerchaged, */
 /* >             D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= 1, and for best performance */
 /* >          LWORK >= f2cmax(1,N*NB), where NB is the optimal blocksize for */
 /* >          CHETRF_ROOK. */
 /* >          for LWORK < N, TRS will be done with Level BLAS 2 */
 /* >          for LWORK >= N, TRS will be done with Level BLAS 3 */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization */
 /* >               has been completed, but the block diagonal matrix D is */
 /* >               exactly singular, so the solution could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2013 */

 /* > \ingroup complexHEsolve */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  November 2013,  Igor Kozachenko, */
 /* >                  Computer Science Division, */
 /* >                  University of California, Berkeley */
 /* > */
 /* >  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */
 /* >                  School of Mathematics, */
 /* >                  University of Manchester */
 /* > */
 /* > \endverbatim */


 /*  ===================================================================== */
 /* Subroutine */ int chesv_rook_(char *uplo, integer *n, integer *nrhs, 
 	complex *a, integer *lda, integer *ipiv, complex *b, integer *ldb, 
 	complex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern logical lsame_(char *, char *);
    integer nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int chetrf_rook_(char *, integer *, complex *, 
 	    integer *, integer *, complex *, integer *, integer *), 
 	    chetrs_rook_(char *, integer *, integer *, complex *, integer *, 
 	    integer *, complex *, integer *, integer *);


 /*  -- LAPACK driver routine (version 3.5.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2013 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -10;
    }

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkopt = 1;
 	} else {
 	    nb = ilaenv_(&c__1, "CHETRF_ROOK", uplo, n, &c_n1, &c_n1, &c_n1, (
 		    ftnlen)11, (ftnlen)1);
 	    lwkopt = *n * nb;
 	}
 	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHESV_ROOK ", &i__1, (ftnlen)11);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Compute the factorization A = U*D*U**H or A = L*D*L**H. */

    chetrf_rook_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 /*        Solve with TRS ( Use Level BLAS 2) */

 	chetrs_rook_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset]
 		, ldb, info);

    }

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

 /*     End of CHESV_ROOK */

 } /* chesv_rook__ */

--- a/lapack-netlib/SRC/chesvx.c
+++ b/lapack-netlib/SRC/chesvx.c
@@ -0,0 +1,844 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief <b> CHESVX computes the solution to system of linear equations A * X = B for HE matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHESVX + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chesvx.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chesvx.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chesvx.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, */
 /*                          LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, */
 /*                          RWORK, INFO ) */

 /*       CHARACTER          FACT, UPLO */
 /*       INTEGER            INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS */
 /*       REAL               RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               BERR( * ), FERR( * ), RWORK( * ) */
 /*       COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
 /*      $                   WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHESVX uses the diagonal pivoting factorization to compute the */
 /* > solution to a complex system of linear equations A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > Error bounds on the solution and a condition estimate are also */
 /* > provided. */
 /* > \endverbatim */

 /* > \par Description: */
 /*  ================= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > The following steps are performed: */
 /* > */
 /* > 1. If FACT = 'N', the diagonal pivoting method is used to factor A. */
 /* >    The form of the factorization is */
 /* >       A = U * D * U**H,  if UPLO = 'U', or */
 /* >       A = L * D * L**H,  if UPLO = 'L', */
 /* >    where U (or L) is a product of permutation and unit upper (lower) */
 /* >    triangular matrices, and D is Hermitian and block diagonal with */
 /* >    1-by-1 and 2-by-2 diagonal blocks. */
 /* > */
 /* > 2. If some D(i,i)=0, so that D is exactly singular, then the routine */
 /* >    returns with INFO = i. Otherwise, the factored form of A is used */
 /* >    to estimate the condition number of the matrix A.  If the */
 /* >    reciprocal of the condition number is less than machine precision, */
 /* >    INFO = N+1 is returned as a warning, but the routine still goes on */
 /* >    to solve for X and compute error bounds as described below. */
 /* > */
 /* > 3. The system of equations is solved for X using the factored form */
 /* >    of A. */
 /* > */
 /* > 4. Iterative refinement is applied to improve the computed solution */
 /* >    matrix and calculate error bounds and backward error estimates */
 /* >    for it. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] FACT */
 /* > \verbatim */
 /* >          FACT is CHARACTER*1 */
 /* >          Specifies whether or not the factored form of A has been */
 /* >          supplied on entry. */
 /* >          = 'F':  On entry, AF and IPIV contain the factored form */
 /* >                  of A.  A, AF and IPIV will not be modified. */
 /* >          = 'N':  The matrix A will be copied to AF and factored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrices B and X.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N */
 /* >          upper triangular part of A contains the upper triangular part */
 /* >          of the matrix A, and the strictly lower triangular part of A */
 /* >          is not referenced.  If UPLO = 'L', the leading N-by-N lower */
 /* >          triangular part of A contains the lower triangular part of */
 /* >          the matrix A, and the strictly upper triangular part of A is */
 /* >          not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AF */
 /* > \verbatim */
 /* >          AF is COMPLEX array, dimension (LDAF,N) */
 /* >          If FACT = 'F', then AF is an input argument and on entry */
 /* >          contains the block diagonal matrix D and the multipliers used */
 /* >          to obtain the factor U or L from the factorization */
 /* >          A = U*D*U**H or A = L*D*L**H as computed by CHETRF. */
 /* > */
 /* >          If FACT = 'N', then AF is an output argument and on exit */
 /* >          returns the block diagonal matrix D and the multipliers used */
 /* >          to obtain the factor U or L from the factorization */
 /* >          A = U*D*U**H or A = L*D*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAF */
 /* > \verbatim */
 /* >          LDAF is INTEGER */
 /* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          If FACT = 'F', then IPIV is an input argument and on entry */
 /* >          contains details of the interchanges and the block structure */
 /* >          of D, as determined by CHETRF. */
 /* >          If IPIV(k) > 0, then rows and columns k and IPIV(k) were */
 /* >          interchanged and D(k,k) is a 1-by-1 diagonal block. */
 /* >          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */
 /* >          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */
 /* >          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) = */
 /* >          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */
 /* >          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */
 /* > */
 /* >          If FACT = 'N', then IPIV is an output argument and on exit */
 /* >          contains details of the interchanges and the block structure */
 /* >          of D, as determined by CHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is COMPLEX array, dimension (LDB,NRHS) */
 /* >          The N-by-NRHS right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is COMPLEX array, dimension (LDX,NRHS) */
 /* >          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          The estimate of the reciprocal condition number of the matrix */
 /* >          A.  If RCOND is less than the machine precision (in */
 /* >          particular, if RCOND = 0), the matrix is singular to working */
 /* >          precision.  This condition is indicated by a return code of */
 /* >          INFO > 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is REAL array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is REAL array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= f2cmax(1,2*N), and for best */
 /* >          performance, when FACT = 'N', LWORK >= f2cmax(1,2*N,N*NB), where */
 /* >          NB is the optimal blocksize for CHETRF. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = i, and i is */
 /* >                <= N:  D(i,i) is exactly zero.  The factorization */
 /* >                       has been completed but the factor D is exactly */
 /* >                       singular, so the solution and error bounds could */
 /* >                       not be computed. RCOND = 0 is returned. */
 /* >                = N+1: D is nonsingular, but RCOND is less than machine */
 /* >                       precision, meaning that the matrix is singular */
 /* >                       to working precision.  Nevertheless, the */
 /* >                       solution and error bounds are computed because */
 /* >                       there are a number of situations where the */
 /* >                       computed solution can be more accurate than the */
 /* >                       value of RCOND would suggest. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date April 2012 */

 /* > \ingroup complexHEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int chesvx_(char *fact, char *uplo, integer *n, integer *
 	nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer *
 	ipiv, complex *b, integer *ldb, complex *x, integer *ldx, real *rcond,
 	 real *ferr, real *berr, complex *work, integer *lwork, real *rwork, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
 	    x_offset, i__1, i__2;

    /* Local variables */
    extern logical lsame_(char *, char *);
    real anorm;
    integer nb;
    extern real clanhe_(char *, char *, integer *, complex *, integer *, real 
 	    *);
    extern /* Subroutine */ int checon_(char *, integer *, complex *, integer 
 	    *, integer *, real *, real *, complex *, integer *);
    extern real slamch_(char *);
    logical nofact;
    extern /* Subroutine */ int cherfs_(char *, integer *, integer *, complex 
 	    *, integer *, complex *, integer *, integer *, complex *, integer 
 	    *, complex *, integer *, real *, real *, complex *, real *, 
 	    integer *), chetrf_(char *, integer *, complex *, integer 
 	    *, integer *, complex *, integer *, integer *), clacpy_(
 	    char *, integer *, integer *, complex *, integer *, complex *, 
 	    integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), chetrs_(
 	    char *, integer *, integer *, complex *, integer *, integer *, 
 	    complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     April 2012 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    nofact = lsame_(fact, "N");
    lquery = *lwork == -1;
    if (! nofact && ! lsame_(fact, "F")) {
 	*info = -1;
    } else if (! lsame_(uplo, "U") && ! lsame_(uplo, 
 	    "L")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -6;
    } else if (*ldaf < f2cmax(1,*n)) {
 	*info = -8;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -11;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -13;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = *n << 1;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -18;
 	}
    }

    if (*info == 0) {
 /* Computing MAX */
 	i__1 = 1, i__2 = *n << 1;
 	lwkopt = f2cmax(i__1,i__2);
 	if (nofact) {
 	    nb = ilaenv_(&c__1, "CHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (
 		    ftnlen)6, (ftnlen)1);
 /* Computing MAX */
 	    i__1 = lwkopt, i__2 = *n * nb;
 	    lwkopt = f2cmax(i__1,i__2);
 	}
 	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHESVX", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

    if (nofact) {

 /*        Compute the factorization A = U*D*U**H or A = L*D*L**H. */

 	clacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf);
 	chetrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], lwork, 
 		info);

 /*        Return if INFO is non-zero. */

 	if (*info > 0) {
 	    *rcond = 0.f;
 	    return 0;
 	}
    }

 /*     Compute the norm of the matrix A. */

    anorm = clanhe_("I", uplo, n, &a[a_offset], lda, &rwork[1]);

 /*     Compute the reciprocal of the condition number of A. */

    checon_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &anorm, rcond, &work[1], 
 	    info);

 /*     Compute the solution vectors X. */

    clacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
    chetrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, 
 	    info);

 /*     Use iterative refinement to improve the computed solutions and */
 /*     compute error bounds and backward error estimates for them. */

    cherfs_(uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], 
 	    &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1]
 	    , &rwork[1], info);

 /*     Set INFO = N+1 if the matrix is singular to working precision. */

    if (*rcond < slamch_("Epsilon")) {
 	*info = *n + 1;
    }

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

 /*     End of CHESVX */

 } /* chesvx_ */

--- a/lapack-netlib/SRC/chesvxx.c
+++ b/lapack-netlib/SRC/chesvxx.c
--- a/lapack-netlib/SRC/cheswapr.c
+++ b/lapack-netlib/SRC/cheswapr.c
@@ -0,0 +1,630 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b CHESWAPR applies an elementary permutation on the rows and columns of a Hermitian matrix. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHESWAPR + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheswap
 r.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheswap
 r.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheswap
 r.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHESWAPR( UPLO, N, A, LDA, I1, I2) */

 /*       CHARACTER        UPLO */
 /*       INTEGER          I1, I2, LDA, N */
 /*       COMPLEX          A( LDA, N ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHESWAPR applies an elementary permutation on the rows and the columns of */
 /* > a hermitian matrix. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the details of the factorization are stored */
 /* >          as an upper or lower triangular matrix. */
 /* >          = 'U':  Upper triangular, form is A = U*D*U**T; */
 /* >          = 'L':  Lower triangular, form is A = L*D*L**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the NB diagonal matrix D and the multipliers */
 /* >          used to obtain the factor U or L as computed by CSYTRF. */
 /* > */
 /* >          On exit, if INFO = 0, the (symmetric) inverse of the original */
 /* >          matrix.  If UPLO = 'U', the upper triangular part of the */
 /* >          inverse is formed and the part of A below the diagonal is not */
 /* >          referenced; if UPLO = 'L' the lower triangular part of the */
 /* >          inverse is formed and the part of A above the diagonal is */
 /* >          not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] I1 */
 /* > \verbatim */
 /* >          I1 is INTEGER */
 /* >          Index of the first row to swap */
 /* > \endverbatim */
 /* > */
 /* > \param[in] I2 */
 /* > \verbatim */
 /* >          I2 is INTEGER */
 /* >          Index of the second row to swap */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEauxiliary */

 /*  ===================================================================== */
 /* Subroutine */ int cheswapr_(char *uplo, integer *n, complex *a, integer *
 	lda, integer *i1, integer *i2)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    complex q__1;

    /* Local variables */
    integer i__;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int cswap_(integer *, complex *, integer *, 
 	    complex *, integer *);
    logical upper;
    complex tmp;


 /*  -- LAPACK auxiliary routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */



    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    upper = lsame_(uplo, "U");
    if (upper) {

 /*         UPPER */
 /*         first swap */
 /*          - swap column I1 and I2 from I1 to I1-1 */
 	i__1 = *i1 - 1;
 	cswap_(&i__1, &a[*i1 * a_dim1 + 1], &c__1, &a[*i2 * a_dim1 + 1], &
 		c__1);

 /*          second swap : */
 /*          - swap A(I1,I1) and A(I2,I2) */
 /*          - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 */
 /*          - swap A(I2,I1) and A(I1,I2) */
 	i__1 = *i1 + *i1 * a_dim1;
 	tmp.r = a[i__1].r, tmp.i = a[i__1].i;
 	i__1 = *i1 + *i1 * a_dim1;
 	i__2 = *i2 + *i2 * a_dim1;
 	a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
 	i__1 = *i2 + *i2 * a_dim1;
 	a[i__1].r = tmp.r, a[i__1].i = tmp.i;

 	i__1 = *i2 - *i1 - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = *i1 + (*i1 + i__) * a_dim1;
 	    tmp.r = a[i__2].r, tmp.i = a[i__2].i;
 	    i__2 = *i1 + (*i1 + i__) * a_dim1;
 	    r_cnjg(&q__1, &a[*i1 + i__ + *i2 * a_dim1]);
 	    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
 	    i__2 = *i1 + i__ + *i2 * a_dim1;
 	    r_cnjg(&q__1, &tmp);
 	    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
 	}

 	i__1 = *i1 + *i2 * a_dim1;
 	r_cnjg(&q__1, &a[*i1 + *i2 * a_dim1]);
 	a[i__1].r = q__1.r, a[i__1].i = q__1.i;

 /*          third swap */
 /*          - swap row I1 and I2 from I2+1 to N */
 	i__1 = *n;
 	for (i__ = *i2 + 1; i__ <= i__1; ++i__) {
 	    i__2 = *i1 + i__ * a_dim1;
 	    tmp.r = a[i__2].r, tmp.i = a[i__2].i;
 	    i__2 = *i1 + i__ * a_dim1;
 	    i__3 = *i2 + i__ * a_dim1;
 	    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
 	    i__2 = *i2 + i__ * a_dim1;
 	    a[i__2].r = tmp.r, a[i__2].i = tmp.i;
 	}

    } else {

 /*         LOWER */
 /*         first swap */
 /*          - swap row I1 and I2 from 1 to I1-1 */
 	i__1 = *i1 - 1;
 	cswap_(&i__1, &a[*i1 + a_dim1], lda, &a[*i2 + a_dim1], lda);

 /*         second swap : */
 /*          - swap A(I1,I1) and A(I2,I2) */
 /*          - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 */
 /*          - swap A(I2,I1) and A(I1,I2) */
 	i__1 = *i1 + *i1 * a_dim1;
 	tmp.r = a[i__1].r, tmp.i = a[i__1].i;
 	i__1 = *i1 + *i1 * a_dim1;
 	i__2 = *i2 + *i2 * a_dim1;
 	a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
 	i__1 = *i2 + *i2 * a_dim1;
 	a[i__1].r = tmp.r, a[i__1].i = tmp.i;

 	i__1 = *i2 - *i1 - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = *i1 + i__ + *i1 * a_dim1;
 	    tmp.r = a[i__2].r, tmp.i = a[i__2].i;
 	    i__2 = *i1 + i__ + *i1 * a_dim1;
 	    r_cnjg(&q__1, &a[*i2 + (*i1 + i__) * a_dim1]);
 	    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
 	    i__2 = *i2 + (*i1 + i__) * a_dim1;
 	    r_cnjg(&q__1, &tmp);
 	    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
 	}

 	i__1 = *i2 + *i1 * a_dim1;
 	r_cnjg(&q__1, &a[*i2 + *i1 * a_dim1]);
 	a[i__1].r = q__1.r, a[i__1].i = q__1.i;

 /*         third swap */
 /*          - swap col I1 and I2 from I2+1 to N */
 	i__1 = *n;
 	for (i__ = *i2 + 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + *i1 * a_dim1;
 	    tmp.r = a[i__2].r, tmp.i = a[i__2].i;
 	    i__2 = i__ + *i1 * a_dim1;
 	    i__3 = i__ + *i2 * a_dim1;
 	    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
 	    i__2 = i__ + *i2 * a_dim1;
 	    a[i__2].r = tmp.r, a[i__2].i = tmp.i;
 	}

    }
    return 0;
 } /* cheswapr_ */

--- a/lapack-netlib/SRC/chetd2.c
+++ b/lapack-netlib/SRC/chetd2.c
@@ -0,0 +1,793 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b2 = {0.f,0.f};
 static integer c__1 = 1;

 /* > \brief \b CHETD2 reduces a Hermitian matrix to real symmetric tridiagonal form by an unitary similarity t
 ransformation (unblocked algorithm). */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHETD2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetd2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetd2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetd2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, N */
 /*       REAL               D( * ), E( * ) */
 /*       COMPLEX            A( LDA, * ), TAU( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHETD2 reduces a complex Hermitian matrix A to real symmetric */
 /* > tridiagonal form T by a unitary similarity transformation: */
 /* > Q**H * A * Q = T. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the upper or lower triangular part of the */
 /* >          Hermitian matrix A is stored: */
 /* >          = 'U':  Upper triangular */
 /* >          = 'L':  Lower triangular */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          n-by-n upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading n-by-n lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* >          On exit, if UPLO = 'U', the diagonal and first superdiagonal */
 /* >          of A are overwritten by the corresponding elements of the */
 /* >          tridiagonal matrix T, and the elements above the first */
 /* >          superdiagonal, with the array TAU, represent the unitary */
 /* >          matrix Q as a product of elementary reflectors; if UPLO */
 /* >          = 'L', the diagonal and first subdiagonal of A are over- */
 /* >          written by the corresponding elements of the tridiagonal */
 /* >          matrix T, and the elements below the first subdiagonal, with */
 /* >          the array TAU, represent the unitary matrix Q as a product */
 /* >          of elementary reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          The diagonal elements of the tridiagonal matrix T: */
 /* >          D(i) = A(i,i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is REAL array, dimension (N-1) */
 /* >          The off-diagonal elements of the tridiagonal matrix T: */
 /* >          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX array, dimension (N-1) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  If UPLO = 'U', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(n-1) . . . H(2) H(1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */
 /* >  A(1:i-1,i+1), and tau in TAU(i). */
 /* > */
 /* >  If UPLO = 'L', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(n-1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */
 /* >  and tau in TAU(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples */
 /* >  with n = 5: */
 /* > */
 /* >  if UPLO = 'U':                       if UPLO = 'L': */
 /* > */
 /* >    (  d   e   v2  v3  v4 )              (  d                  ) */
 /* >    (      d   e   v3  v4 )              (  e   d              ) */
 /* >    (          d   e   v4 )              (  v1  e   d          ) */
 /* >    (              d   e  )              (  v1  v2  e   d      ) */
 /* >    (                  d  )              (  v1  v2  v3  e   d  ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of T, and vi */
 /* >  denotes an element of the vector defining H(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int chetd2_(char *uplo, integer *n, complex *a, integer *lda,
 	 real *d__, real *e, complex *tau, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1;
    complex q__1, q__2, q__3, q__4;

    /* Local variables */
    complex taui;
    extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex *
 	    , integer *, complex *, integer *, complex *, integer *);
    integer i__;
    complex alpha;
    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer 
 	    *, complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex *
 	    , integer *, complex *, integer *, complex *, complex *, integer *
 	    ), caxpy_(integer *, complex *, complex *, integer *, 
 	    complex *, integer *);
    logical upper;
    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, 
 	    integer *, complex *), xerbla_(char *, integer *, ftnlen);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tau;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHETD2", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n <= 0) {
 	return 0;
    }

    if (upper) {

 /*        Reduce the upper triangle of A */

 	i__1 = *n + *n * a_dim1;
 	i__2 = *n + *n * a_dim1;
 	r__1 = a[i__2].r;
 	a[i__1].r = r__1, a[i__1].i = 0.f;
 	for (i__ = *n - 1; i__ >= 1; --i__) {

 /*           Generate elementary reflector H(i) = I - tau * v * v**H */
 /*           to annihilate A(1:i-1,i+1) */

 	    i__1 = i__ + (i__ + 1) * a_dim1;
 	    alpha.r = a[i__1].r, alpha.i = a[i__1].i;
 	    clarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui);
 	    i__1 = i__;
 	    e[i__1] = alpha.r;

 	    if (taui.r != 0.f || taui.i != 0.f) {

 /*              Apply H(i) from both sides to A(1:i,1:i) */

 		i__1 = i__ + (i__ + 1) * a_dim1;
 		a[i__1].r = 1.f, a[i__1].i = 0.f;

 /*              Compute  x := tau * A * v  storing x in TAU(1:i) */

 		chemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * 
 			a_dim1 + 1], &c__1, &c_b2, &tau[1], &c__1);

 /*              Compute  w := x - 1/2 * tau * (x**H * v) * v */

 		q__3.r = -.5f, q__3.i = 0.f;
 		q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * 
 			taui.i + q__3.i * taui.r;
 		cdotc_(&q__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1]
 			, &c__1);
 		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * 
 			q__4.i + q__2.i * q__4.r;
 		alpha.r = q__1.r, alpha.i = q__1.i;
 		caxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
 			1], &c__1);

 /*              Apply the transformation as a rank-2 update: */
 /*                 A := A - v * w**H - w * v**H */

 		q__1.r = -1.f, q__1.i = 0.f;
 		cher2_(uplo, &i__, &q__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, &
 			tau[1], &c__1, &a[a_offset], lda);

 	    } else {
 		i__1 = i__ + i__ * a_dim1;
 		i__2 = i__ + i__ * a_dim1;
 		r__1 = a[i__2].r;
 		a[i__1].r = r__1, a[i__1].i = 0.f;
 	    }
 	    i__1 = i__ + (i__ + 1) * a_dim1;
 	    i__2 = i__;
 	    a[i__1].r = e[i__2], a[i__1].i = 0.f;
 	    i__1 = i__ + 1;
 	    i__2 = i__ + 1 + (i__ + 1) * a_dim1;
 	    d__[i__1] = a[i__2].r;
 	    i__1 = i__;
 	    tau[i__1].r = taui.r, tau[i__1].i = taui.i;
 /* L10: */
 	}
 	i__1 = a_dim1 + 1;
 	d__[1] = a[i__1].r;
    } else {

 /*        Reduce the lower triangle of A */

 	i__1 = a_dim1 + 1;
 	i__2 = a_dim1 + 1;
 	r__1 = a[i__2].r;
 	a[i__1].r = r__1, a[i__1].i = 0.f;
 	i__1 = *n - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {

 /*           Generate elementary reflector H(i) = I - tau * v * v**H */
 /*           to annihilate A(i+2:n,i) */

 	    i__2 = i__ + 1 + i__ * a_dim1;
 	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
 	    i__2 = *n - i__;
 /* Computing MIN */
 	    i__3 = i__ + 2;
 	    clarfg_(&i__2, &alpha, &a[f2cmin(i__3,*n) + i__ * a_dim1], &c__1, &
 		    taui);
 	    i__2 = i__;
 	    e[i__2] = alpha.r;

 	    if (taui.r != 0.f || taui.i != 0.f) {

 /*              Apply H(i) from both sides to A(i+1:n,i+1:n) */

 		i__2 = i__ + 1 + i__ * a_dim1;
 		a[i__2].r = 1.f, a[i__2].i = 0.f;

 /*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */

 		i__2 = *n - i__;
 		chemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], 
 			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b2, &tau[
 			i__], &c__1);

 /*              Compute  w := x - 1/2 * tau * (x**H * v) * v */

 		q__3.r = -.5f, q__3.i = 0.f;
 		q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * 
 			taui.i + q__3.i * taui.r;
 		i__2 = *n - i__;
 		cdotc_(&q__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * 
 			a_dim1], &c__1);
 		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * 
 			q__4.i + q__2.i * q__4.r;
 		alpha.r = q__1.r, alpha.i = q__1.i;
 		i__2 = *n - i__;
 		caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 			i__], &c__1);

 /*              Apply the transformation as a rank-2 update: */
 /*                 A := A - v * w**H - w * v**H */

 		i__2 = *n - i__;
 		q__1.r = -1.f, q__1.i = 0.f;
 		cher2_(uplo, &i__2, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1, 
 			&tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], 
 			lda);

 	    } else {
 		i__2 = i__ + 1 + (i__ + 1) * a_dim1;
 		i__3 = i__ + 1 + (i__ + 1) * a_dim1;
 		r__1 = a[i__3].r;
 		a[i__2].r = r__1, a[i__2].i = 0.f;
 	    }
 	    i__2 = i__ + 1 + i__ * a_dim1;
 	    i__3 = i__;
 	    a[i__2].r = e[i__3], a[i__2].i = 0.f;
 	    i__2 = i__;
 	    i__3 = i__ + i__ * a_dim1;
 	    d__[i__2] = a[i__3].r;
 	    i__2 = i__;
 	    tau[i__2].r = taui.r, tau[i__2].i = taui.i;
 /* L20: */
 	}
 	i__1 = *n;
 	i__2 = *n + *n * a_dim1;
 	d__[i__1] = a[i__2].r;
    }

    return 0;

 /*     End of CHETD2 */

 } /* chetd2_ */

--- a/lapack-netlib/SRC/chetf2.c
+++ b/lapack-netlib/SRC/chetf2.c
--- a/lapack-netlib/SRC/chetf2_rk.c
+++ b/lapack-netlib/SRC/chetf2_rk.c
--- a/lapack-netlib/SRC/chetf2_rook.c
+++ b/lapack-netlib/SRC/chetf2_rook.c
--- a/lapack-netlib/SRC/chetrd.c
+++ b/lapack-netlib/SRC/chetrd.c
@@ -0,0 +1,814 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 static real c_b23 = 1.f;

 /* > \brief \b CHETRD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHETRD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LWORK, N */
 /*       REAL               D( * ), E( * ) */
 /*       COMPLEX            A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHETRD reduces a complex Hermitian matrix A to real symmetric */
 /* > tridiagonal form T by a unitary similarity transformation: */
 /* > Q**H * A * Q = T. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* >          On exit, if UPLO = 'U', the diagonal and first superdiagonal */
 /* >          of A are overwritten by the corresponding elements of the */
 /* >          tridiagonal matrix T, and the elements above the first */
 /* >          superdiagonal, with the array TAU, represent the unitary */
 /* >          matrix Q as a product of elementary reflectors; if UPLO */
 /* >          = 'L', the diagonal and first subdiagonal of A are over- */
 /* >          written by the corresponding elements of the tridiagonal */
 /* >          matrix T, and the elements below the first subdiagonal, with */
 /* >          the array TAU, represent the unitary matrix Q as a product */
 /* >          of elementary reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          The diagonal elements of the tridiagonal matrix T: */
 /* >          D(i) = A(i,i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is REAL array, dimension (N-1) */
 /* >          The off-diagonal elements of the tridiagonal matrix T: */
 /* >          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX array, dimension (N-1) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= 1. */
 /* >          For optimum performance LWORK >= N*NB, where NB is the */
 /* >          optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complexHEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  If UPLO = 'U', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(n-1) . . . H(2) H(1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */
 /* >  A(1:i-1,i+1), and tau in TAU(i). */
 /* > */
 /* >  If UPLO = 'L', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(n-1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */
 /* >  and tau in TAU(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples */
 /* >  with n = 5: */
 /* > */
 /* >  if UPLO = 'U':                       if UPLO = 'L': */
 /* > */
 /* >    (  d   e   v2  v3  v4 )              (  d                  ) */
 /* >    (      d   e   v3  v4 )              (  e   d              ) */
 /* >    (          d   e   v4 )              (  v1  e   d          ) */
 /* >    (              d   e  )              (  v1  v2  e   d      ) */
 /* >    (                  d  )              (  v1  v2  v3  e   d  ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of T, and vi */
 /* >  denotes an element of the vector defining H(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda,
 	 real *d__, real *e, complex *tau, complex *work, integer *lwork, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    complex q__1;

    /* Local variables */
    integer i__, j;
    extern logical lsame_(char *, char *);
    integer nbmin, iinfo;
    logical upper;
    extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer 
 	    *, real *, real *, complex *, integer *), cher2k_(char *, 
 	    char *, integer *, integer *, complex *, complex *, integer *, 
 	    complex *, integer *, real *, complex *, integer *);
    integer nb, kk, nx;
    extern /* Subroutine */ int clatrd_(char *, integer *, integer *, complex 
 	    *, integer *, real *, complex *, complex *, integer *), 
 	    xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1;
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -9;
    }

    if (*info == 0) {

 /*        Determine the block size. */

 	nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 	lwkopt = *n * nb;
 	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHETRD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	work[1].r = 1.f, work[1].i = 0.f;
 	return 0;
    }

    nx = *n;
    iws = 1;
    if (nb > 1 && nb < *n) {

 /*        Determine when to cross over from blocked to unblocked code */
 /*        (last block is always handled by unblocked code). */

 /* Computing MAX */
 	i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, &
 		c_n1, (ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < *n) {

 /*           Determine if workspace is large enough for blocked code. */

 	    ldwork = *n;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {

 /*              Not enough workspace to use optimal NB:  determine the */
 /*              minimum value of NB, and reduce NB or force use of */
 /*              unblocked code by setting NX = N. */

 /* Computing MAX */
 		i__1 = *lwork / ldwork;
 		nb = f2cmax(i__1,1);
 		nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1,
 			 (ftnlen)6, (ftnlen)1);
 		if (nb < nbmin) {
 		    nx = *n;
 		}
 	    }
 	} else {
 	    nx = *n;
 	}
    } else {
 	nb = 1;
    }

    if (upper) {

 /*        Reduce the upper triangle of A. */
 /*        Columns 1:kk are handled by the unblocked method. */

 	kk = *n - (*n - nx + nb - 1) / nb * nb;
 	i__1 = kk + 1;
 	i__2 = -nb;
 	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
 		i__2) {

 /*           Reduce columns i:i+nb-1 to tridiagonal form and form the */
 /*           matrix W which is needed to update the unreduced part of */
 /*           the matrix */

 	    i__3 = i__ + nb - 1;
 	    clatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
 		    work[1], &ldwork);

 /*           Update the unreduced submatrix A(1:i-1,1:i-1), using an */
 /*           update of the form:  A := A - V*W**H - W*V**H */

 	    i__3 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = 0.f;
 	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ * a_dim1 
 		    + 1], lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda);

 /*           Copy superdiagonal elements back into A, and diagonal */
 /*           elements into D */

 	    i__3 = i__ + nb - 1;
 	    for (j = i__; j <= i__3; ++j) {
 		i__4 = j - 1 + j * a_dim1;
 		i__5 = j - 1;
 		a[i__4].r = e[i__5], a[i__4].i = 0.f;
 		i__4 = j;
 		i__5 = j + j * a_dim1;
 		d__[i__4] = a[i__5].r;
 /* L10: */
 	    }
 /* L20: */
 	}

 /*        Use unblocked code to reduce the last or only block */

 	chetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
    } else {

 /*        Reduce the lower triangle of A */

 	i__2 = *n - nx;
 	i__1 = nb;
 	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {

 /*           Reduce columns i:i+nb-1 to tridiagonal form and form the */
 /*           matrix W which is needed to update the unreduced part of */
 /*           the matrix */

 	    i__3 = *n - i__ + 1;
 	    clatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
 		    tau[i__], &work[1], &ldwork);

 /*           Update the unreduced submatrix A(i+nb:n,i+nb:n), using */
 /*           an update of the form:  A := A - V*W**H - W*V**H */

 	    i__3 = *n - i__ - nb + 1;
 	    q__1.r = -1.f, q__1.i = 0.f;
 	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ + nb + 
 		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b23, &a[
 		    i__ + nb + (i__ + nb) * a_dim1], lda);

 /*           Copy subdiagonal elements back into A, and diagonal */
 /*           elements into D */

 	    i__3 = i__ + nb - 1;
 	    for (j = i__; j <= i__3; ++j) {
 		i__4 = j + 1 + j * a_dim1;
 		i__5 = j;
 		a[i__4].r = e[i__5], a[i__4].i = 0.f;
 		i__4 = j;
 		i__5 = j + j * a_dim1;
 		d__[i__4] = a[i__5].r;
 /* L30: */
 	    }
 /* L40: */
 	}

 /*        Use unblocked code to reduce the last or only block */

 	i__1 = *n - i__ + 1;
 	chetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], 
 		&tau[i__], &iinfo);
    }

    work[1].r = (real) lwkopt, work[1].i = 0.f;
    return 0;

 /*     End of CHETRD */

 } /* chetrd_ */

--- a/lapack-netlib/SRC/chetrd_2stage.c
+++ b/lapack-netlib/SRC/chetrd_2stage.c
@@ -0,0 +1,744 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static integer c__3 = 3;
 static integer c__4 = 4;

 /* > \brief \b CHETRD_2STAGE */

 /*  @generated from zhetrd_2stage.f, fortran z -> c, Sun Nov  6 19:34:06 2016 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHETRD_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd_
 2stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd_
 2stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd_
 2stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, */
 /*                                 HOUS2, LHOUS2, WORK, LWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          VECT, UPLO */
 /*       INTEGER            N, LDA, LWORK, LHOUS2, INFO */
 /*       REAL               D( * ), E( * ) */
 /*       COMPLEX            A( LDA, * ), TAU( * ), */
 /*                          HOUS2( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHETRD_2STAGE reduces a complex Hermitian matrix A to real symmetric */
 /* > tridiagonal form T by a unitary similarity transformation: */
 /* > Q1**H Q2**H* A * Q2 * Q1 = T. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] VECT */
 /* > \verbatim */
 /* >          VECT is CHARACTER*1 */
 /* >          = 'N':  No need for the Housholder representation, */
 /* >                  in particular for the second stage (Band to */
 /* >                  tridiagonal) and thus LHOUS2 is of size f2cmax(1, 4*N); */
 /* >          = 'V':  the Householder representation is needed to */
 /* >                  either generate Q1 Q2 or to apply Q1 Q2, */
 /* >                  then LHOUS2 is to be queried and computed. */
 /* >                  (NOT AVAILABLE IN THIS RELEASE). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* >          On exit, if UPLO = 'U', the band superdiagonal */
 /* >          of A are overwritten by the corresponding elements of the */
 /* >          internal band-diagonal matrix AB, and the elements above */
 /* >          the KD superdiagonal, with the array TAU, represent the unitary */
 /* >          matrix Q1 as a product of elementary reflectors; if UPLO */
 /* >          = 'L', the diagonal and band subdiagonal of A are over- */
 /* >          written by the corresponding elements of the internal band-diagonal */
 /* >          matrix AB, and the elements below the KD subdiagonal, with */
 /* >          the array TAU, represent the unitary matrix Q1 as a product */
 /* >          of elementary reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          The diagonal elements of the tridiagonal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is REAL array, dimension (N-1) */
 /* >          The off-diagonal elements of the tridiagonal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX array, dimension (N-KD) */
 /* >          The scalar factors of the elementary reflectors of */
 /* >          the first stage (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] HOUS2 */
 /* > \verbatim */
 /* >          HOUS2 is COMPLEX array, dimension (LHOUS2) */
 /* >          Stores the Householder representation of the stage2 */
 /* >          band to tridiagonal. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LHOUS2 */
 /* > \verbatim */
 /* >          LHOUS2 is INTEGER */
 /* >          The dimension of the array HOUS2. */
 /* >          If LWORK = -1, or LHOUS2=-1, */
 /* >          then a query is assumed; the routine */
 /* >          only calculates the optimal size of the HOUS2 array, returns */
 /* >          this value as the first entry of the HOUS2 array, and no error */
 /* >          message related to LHOUS2 is issued by XERBLA. */
 /* >          If VECT='N', LHOUS2 = f2cmax(1, 4*n); */
 /* >          if VECT='V', option not yet available. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (LWORK) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK = MAX(1, dimension) */
 /* >          If LWORK = -1, or LHOUS2 = -1, */
 /* >          then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* >          LWORK = MAX(1, dimension) where */
 /* >          dimension   = f2cmax(stage1,stage2) + (KD+1)*N */
 /* >                      = N*KD + N*f2cmax(KD+1,FACTOPTNB) */
 /* >                        + f2cmax(2*KD*KD, KD*NTHREADS) */
 /* >                        + (KD+1)*N */
 /* >          where KD is the blocking size of the reduction, */
 /* >          FACTOPTNB is the blocking used by the QR or LQ */
 /* >          algorithm, usually FACTOPTNB=128 is a good choice */
 /* >          NTHREADS is the number of threads used when */
 /* >          openMP compilation is enabled, otherwise =1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complexHEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  Implemented by Azzam Haidar. */
 /* > */
 /* >  All details are available on technical report, SC11, SC13 papers. */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int chetrd_2stage_(char *vect, char *uplo, integer *n, 
 	complex *a, integer *lda, real *d__, real *e, complex *tau, complex *
 	hous2, integer *lhous2, complex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1;

    /* Local variables */
    integer ldab;
    extern /* Subroutine */ int chetrd_hb2st_(char *, char *, char *, 
 	    integer *, integer *, complex *, integer *, real *, real *, 
 	    complex *, integer *, complex *, integer *, integer *);
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    integer lwrk, wpos;
    extern logical lsame_(char *, char *);
    integer abpos, lhmin, lwmin;
    logical wantq, upper;
    integer ib, kd;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical lquery;
    extern /* Subroutine */ int chetrd_he2hb_(char *, integer *, integer *, 
 	    complex *, integer *, complex *, integer *, complex *, complex *, 
 	    integer *, integer *);



 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */

 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tau;
    --hous2;
    --work;

    /* Function Body */
    *info = 0;
    wantq = lsame_(vect, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *lhous2 == -1;

 /*     Determine the block size, the workspace size and the hous size. */

    kd = ilaenv2stage_(&c__1, "CHETRD_2STAGE", vect, n, &c_n1, &c_n1, &c_n1);
    ib = ilaenv2stage_(&c__2, "CHETRD_2STAGE", vect, n, &kd, &c_n1, &c_n1);
    lhmin = ilaenv2stage_(&c__3, "CHETRD_2STAGE", vect, n, &kd, &ib, &c_n1);
    lwmin = ilaenv2stage_(&c__4, "CHETRD_2STAGE", vect, n, &kd, &ib, &c_n1);
 /*      WRITE(*,*),'CHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, */
 /*     $            LHMIN, LWMIN */

    if (! lsame_(vect, "N")) {
 	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*lhous2 < lhmin && ! lquery) {
 	*info = -10;
    } else if (*lwork < lwmin && ! lquery) {
 	*info = -12;
    }

    if (*info == 0) {
 	hous2[1].r = (real) lhmin, hous2[1].i = 0.f;
 	work[1].r = (real) lwmin, work[1].i = 0.f;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHETRD_2STAGE", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	work[1].r = 1.f, work[1].i = 0.f;
 	return 0;
    }

 /*     Determine pointer position */

    ldab = kd + 1;
    lwrk = *lwork - ldab * *n;
    abpos = 1;
    wpos = abpos + ldab * *n;
    chetrd_he2hb_(uplo, n, &kd, &a[a_offset], lda, &work[abpos], &ldab, &tau[
 	    1], &work[wpos], &lwrk, info);
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHETRD_HE2HB", &i__1, (ftnlen)12);
 	return 0;
    }
    chetrd_hb2st_("Y", vect, uplo, n, &kd, &work[abpos], &ldab, &d__[1], &e[
 	    1], &hous2[1], lhous2, &work[wpos], &lwrk, info);
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHETRD_HB2ST", &i__1, (ftnlen)12);
 	return 0;
    }


    hous2[1].r = (real) lhmin, hous2[1].i = 0.f;
    work[1].r = (real) lwmin, work[1].i = 0.f;
    return 0;

 /*     End of CHETRD_2STAGE */

 } /* chetrd_2stage__ */

--- a/lapack-netlib/SRC/chetrd_hb2st.c
+++ b/lapack-netlib/SRC/chetrd_hb2st.c
--- a/lapack-netlib/SRC/chetrd_he2hb.c
+++ b/lapack-netlib/SRC/chetrd_he2hb.c
@@ -0,0 +1,962 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static complex c_b1 = {0.f,0.f};
 static complex c_b2 = {1.f,0.f};
 static integer c__4 = 4;
 static integer c_n1 = -1;
 static integer c__1 = 1;
 static real c_b33 = 1.f;

 /* > \brief \b CHETRD_HE2HB */

 /*  @generated from zhetrd_he2hb.f, fortran z -> c, Wed Dec  7 08:22:40 2016 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download CHETRD_HE2HB + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, */
 /*                              WORK, LWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDAB, LWORK, N, KD */
 /*       COMPLEX            A( LDA, * ), AB( LDAB, * ), */
 /*                          TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > CHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian */
 /* > band-diagonal form AB by a unitary similarity transformation: */
 /* > Q**H * A * Q = AB. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the reduced matrix if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* >          The reduced matrix is stored in the array AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* >          On exit, if UPLO = 'U', the diagonal and first superdiagonal */
 /* >          of A are overwritten by the corresponding elements of the */
 /* >          tridiagonal matrix T, and the elements above the first */
 /* >          superdiagonal, with the array TAU, represent the unitary */
 /* >          matrix Q as a product of elementary reflectors; if UPLO */
 /* >          = 'L', the diagonal and first subdiagonal of A are over- */
 /* >          written by the corresponding elements of the tridiagonal */
 /* >          matrix T, and the elements below the first subdiagonal, with */
 /* >          the array TAU, represent the unitary matrix Q as a product */
 /* >          of elementary reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX array, dimension (LDAB,N) */
 /* >          On exit, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX array, dimension (N-KD) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX array, dimension (LWORK) */
 /* >          On exit, if INFO = 0, or if LWORK=-1, */
 /* >          WORK(1) returns the size of LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK which should be calculated */
 /* >          by a workspace query. LWORK = MAX(1, LWORK_QUERY) */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* >          LWORK_QUERY = N*KD + N*f2cmax(KD,FACTOPTNB) + 2*KD*KD */
 /* >          where FACTOPTNB is the blocking used by the QR or LQ */
 /* >          algorithm, usually FACTOPTNB=128 is a good choice otherwise */
 /* >          putting LWORK=-1 will provide the size of WORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complexHEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  Implemented by Azzam Haidar. */
 /* > */
 /* >  All details are available on technical report, SC11, SC13 papers. */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  If UPLO = 'U', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd. */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in */
 /* >  A(i,i+kd+1:n), and tau in TAU(i). */
 /* > */
 /* >  If UPLO = 'L', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = n-kd. */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in */
 /* >  A(i+kd+2:n,i), and tau in TAU(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples */
 /* >  with n = 5: */
 /* > */
 /* >  if UPLO = 'U':                       if UPLO = 'L': */
 /* > */
 /* >    (  ab  ab/v1  v1      v1     v1    )              (  ab                            ) */
 /* >    (      ab     ab/v2   v2     v2    )              (  ab/v1  ab                     ) */
 /* >    (             ab      ab/v3  v3    )              (  v1     ab/v2  ab              ) */
 /* >    (                     ab     ab/v4 )              (  v1     v2     ab/v3  ab       ) */
 /* >    (                            ab    )              (  v1     v2     v3     ab/v4 ab ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of T, and vi */
 /* >  denotes an element of the vector defining H(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int chetrd_he2hb_(char *uplo, integer *n, integer *kd, 
 	complex *a, integer *lda, complex *ab, integer *ldab, complex *tau, 
 	complex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, 
 	    i__5;
    complex q__1;

    /* Local variables */
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    integer tpos, wpos, s1pos, s2pos, i__, j;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
 	    integer *, complex *, complex *, integer *, complex *, integer *, 
 	    complex *, complex *, integer *), chemm_(char *, 
 	    char *, integer *, integer *, complex *, complex *, integer *, 
 	    complex *, integer *, complex *, complex *, integer *);
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
 	    complex *, integer *);
    integer lwmin;
    logical upper;
    extern /* Subroutine */ int cher2k_(char *, char *, integer *, integer *, 
 	    complex *, complex *, integer *, complex *, integer *, real *, 
 	    complex *, integer *);
    integer lk, pk, pn, lt;
    extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *, 
 	    integer *, complex *, complex *, integer *, integer *);
    integer lw;
    extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, 
 	    integer *, complex *, complex *, integer *, integer *), clarft_(
 	    char *, char *, integer *, integer *, complex *, integer *, 
 	    complex *, complex *, integer *), claset_(char *, 
 	    integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *, ftnlen);
    integer ls1;
    logical lquery;
    integer ls2, ldt, ldw, lds1, lds2;



 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Determine the minimal workspace size required */
 /*     and test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1;
    lwmin = ilaenv2stage_(&c__4, "CHETRD_HE2HB", "", n, kd, &c_n1, &c_n1);
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kd < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = *kd + 1;
 	if (*ldab < f2cmax(i__1,i__2)) {
 	    *info = -7;
 	} else if (*lwork < lwmin && ! lquery) {
 	    *info = -10;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CHETRD_HE2HB", &i__1, (ftnlen)12);
 	return 0;
    } else if (lquery) {
 	work[1].r = (real) lwmin, work[1].i = 0.f;
 	return 0;
    }

 /*     Quick return if possible */
 /*     Copy the upper/lower portion of A into AB */

    if (*n <= *kd + 1) {
 	if (upper) {
 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 		i__2 = *kd + 1;
 		lk = f2cmin(i__2,i__);
 		ccopy_(&lk, &a[i__ - lk + 1 + i__ * a_dim1], &c__1, &ab[*kd + 
 			1 - lk + 1 + i__ * ab_dim1], &c__1);
 /* L100: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 		i__2 = *kd + 1, i__3 = *n - i__ + 1;
 		lk = f2cmin(i__2,i__3);
 		ccopy_(&lk, &a[i__ + i__ * a_dim1], &c__1, &ab[i__ * ab_dim1 
 			+ 1], &c__1);
 /* L110: */
 	    }
 	}
 	work[1].r = 1.f, work[1].i = 0.f;
 	return 0;
    }

 /*     Determine the pointer position for the workspace */

    ldt = *kd;
    lds1 = *kd;
    lt = ldt * *kd;
    lw = *n * *kd;
    ls1 = lds1 * *kd;
    ls2 = lwmin - lt - lw - ls1;
 /*      LS2 = N*MAX(KD,FACTOPTNB) */
    tpos = 1;
    wpos = tpos + lt;
    s1pos = wpos + lw;
    s2pos = s1pos + ls1;
    if (upper) {
 	ldw = *kd;
 	lds2 = *kd;
    } else {
 	ldw = *n;
 	lds2 = *n;
    }


 /*     Set the workspace of the triangular matrix T to zero once such a */
 /*     way every time T is generated the upper/lower portion will be always zero */

    claset_("A", &ldt, kd, &c_b1, &c_b1, &work[tpos], &ldt);

    if (upper) {
 	i__1 = *n - *kd;
 	i__2 = *kd;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 	    pn = *n - i__ - *kd + 1;
 /* Computing MIN */
 	    i__3 = *n - i__ - *kd + 1;
 	    pk = f2cmin(i__3,*kd);

 /*            Compute the LQ factorization of the current block */

 	    cgelqf_(kd, &pn, &a[i__ + (i__ + *kd) * a_dim1], lda, &tau[i__], &
 		    work[s2pos], &ls2, &iinfo);

 /*            Copy the upper portion of A into AB */

 	    i__3 = i__ + pk - 1;
 	    for (j = i__; j <= i__3; ++j) {
 /* Computing MIN */
 		i__4 = *kd, i__5 = *n - j;
 		lk = f2cmin(i__4,i__5) + 1;
 		i__4 = *ldab - 1;
 		ccopy_(&lk, &a[j + j * a_dim1], lda, &ab[*kd + 1 + j * 
 			ab_dim1], &i__4);
 /* L20: */
 	    }

 	    claset_("Lower", &pk, &pk, &c_b1, &c_b2, &a[i__ + (i__ + *kd) * 
 		    a_dim1], lda);

 /*            Form the matrix T */

 	    clarft_("Forward", "Rowwise", &pn, &pk, &a[i__ + (i__ + *kd) * 
 		    a_dim1], lda, &tau[i__], &work[tpos], &ldt);

 /*            Compute W: */

 	    cgemm_("Conjugate", "No transpose", &pk, &pn, &pk, &c_b2, &work[
 		    tpos], &ldt, &a[i__ + (i__ + *kd) * a_dim1], lda, &c_b1, &
 		    work[s2pos], &lds2);

 	    chemm_("Right", uplo, &pk, &pn, &c_b2, &a[i__ + *kd + (i__ + *kd) 
 		    * a_dim1], lda, &work[s2pos], &lds2, &c_b1, &work[wpos], &
 		    ldw);

 	    cgemm_("No transpose", "Conjugate", &pk, &pk, &pn, &c_b2, &work[
 		    wpos], &ldw, &work[s2pos], &lds2, &c_b1, &work[s1pos], &
 		    lds1);

 	    q__1.r = -.5f, q__1.i = 0.f;
 	    cgemm_("No transpose", "No transpose", &pk, &pn, &pk, &q__1, &
 		    work[s1pos], &lds1, &a[i__ + (i__ + *kd) * a_dim1], lda, &
 		    c_b2, &work[wpos], &ldw);


 /*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using */
 /*            an update of the form:  A := A - V'*W - W'*V */

 	    q__1.r = -1.f, q__1.i = 0.f;
 	    cher2k_(uplo, "Conjugate", &pn, &pk, &q__1, &a[i__ + (i__ + *kd) *
 		     a_dim1], lda, &work[wpos], &ldw, &c_b33, &a[i__ + *kd + (
 		    i__ + *kd) * a_dim1], lda);
 /* L10: */
 	}

 /*        Copy the upper band to AB which is the band storage matrix */

 	i__2 = *n;
 	for (j = *n - *kd + 1; j <= i__2; ++j) {
 /* Computing MIN */
 	    i__1 = *kd, i__3 = *n - j;
 	    lk = f2cmin(i__1,i__3) + 1;
 	    i__1 = *ldab - 1;
 	    ccopy_(&lk, &a[j + j * a_dim1], lda, &ab[*kd + 1 + j * ab_dim1], &
 		    i__1);
 /* L30: */
 	}

    } else {

 /*         Reduce the lower triangle of A to lower band matrix */

 	i__2 = *n - *kd;
 	i__1 = *kd;
 	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
 	    pn = *n - i__ - *kd + 1;
 /* Computing MIN */
 	    i__3 = *n - i__ - *kd + 1;
 	    pk = f2cmin(i__3,*kd);

 /*            Compute the QR factorization of the current block */

 	    cgeqrf_(&pn, kd, &a[i__ + *kd + i__ * a_dim1], lda, &tau[i__], &
 		    work[s2pos], &ls2, &iinfo);

 /*            Copy the upper portion of A into AB */

 	    i__3 = i__ + pk - 1;
 	    for (j = i__; j <= i__3; ++j) {
 /* Computing MIN */
 		i__4 = *kd, i__5 = *n - j;
 		lk = f2cmin(i__4,i__5) + 1;
 		ccopy_(&lk, &a[j + j * a_dim1], &c__1, &ab[j * ab_dim1 + 1], &
 			c__1);
 /* L50: */
 	    }

 	    claset_("Upper", &pk, &pk, &c_b1, &c_b2, &a[i__ + *kd + i__ * 
 		    a_dim1], lda);

 /*            Form the matrix T */

 	    clarft_("Forward", "Columnwise", &pn, &pk, &a[i__ + *kd + i__ * 
 		    a_dim1], lda, &tau[i__], &work[tpos], &ldt);

 /*            Compute W: */

 	    cgemm_("No transpose", "No transpose", &pn, &pk, &pk, &c_b2, &a[
 		    i__ + *kd + i__ * a_dim1], lda, &work[tpos], &ldt, &c_b1, 
 		    &work[s2pos], &lds2);

 	    chemm_("Left", uplo, &pn, &pk, &c_b2, &a[i__ + *kd + (i__ + *kd) *
 		     a_dim1], lda, &work[s2pos], &lds2, &c_b1, &work[wpos], &
 		    ldw);

 	    cgemm_("Conjugate", "No transpose", &pk, &pk, &pn, &c_b2, &work[
 		    s2pos], &lds2, &work[wpos], &ldw, &c_b1, &work[s1pos], &
 		    lds1);

 	    q__1.r = -.5f, q__1.i = 0.f;
 	    cgemm_("No transpose", "No transpose", &pn, &pk, &pk, &q__1, &a[
 		    i__ + *kd + i__ * a_dim1], lda, &work[s1pos], &lds1, &
 		    c_b2, &work[wpos], &ldw);


 /*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using */
 /*            an update of the form:  A := A - V*W' - W*V' */

 	    q__1.r = -1.f, q__1.i = 0.f;
 	    cher2k_(uplo, "No transpose", &pn, &pk, &q__1, &a[i__ + *kd + i__ 
 		    * a_dim1], lda, &work[wpos], &ldw, &c_b33, &a[i__ + *kd + 
 		    (i__ + *kd) * a_dim1], lda);
 /*            ================================================================== */
 /*            RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED */
 /*             DO 45 J = I, I+PK-1 */
 /*                LK = MIN( KD, N-J ) + 1 */
 /*                CALL CCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) */
 /*   45        CONTINUE */
 /*            ================================================================== */
 /* L40: */
 	}

 /*        Copy the lower band to AB which is the band storage matrix */

 	i__1 = *n;
 	for (j = *n - *kd + 1; j <= i__1; ++j) {
 /* Computing MIN */
 	    i__2 = *kd, i__3 = *n - j;
 	    lk = f2cmin(i__2,i__3) + 1;
 	    ccopy_(&lk, &a[j + j * a_dim1], &c__1, &ab[j * ab_dim1 + 1], &
 		    c__1);
 /* L60: */
 	}
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;
    return 0;

 /*     End of CHETRD_HE2HB */

 } /* chetrd_he2hb__ */