Add C versions as fallback

4 years ago · baab2aaeac
--- a/lapack-netlib/SRC/zgeqrt2.c
+++ b/lapack-netlib/SRC/zgeqrt2.c
@@ -0,0 +1,661 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static doublecomplex c_b2 = {0.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY re
 presentation of Q. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGEQRT2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgeqrt2
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgeqrt2
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgeqrt2
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGEQRT2( M, N, A, LDA, T, LDT, INFO ) */

 /*       INTEGER   INFO, LDA, LDT, M, N */
 /*       COMPLEX*16   A( LDA, * ), T( LDT, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGEQRT2 computes a QR factorization of a complex M-by-N matrix A, */
 /* > using the compact WY representation of Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the complex M-by-N matrix A.  On exit, the elements on and */
 /* >          above the diagonal contain the N-by-N upper triangular matrix R; the */
 /* >          elements below the diagonal are the columns of V.  See below for */
 /* >          further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is COMPLEX*16 array, dimension (LDT,N) */
 /* >          The N-by-N upper triangular factor of the block reflector. */
 /* >          The elements on and above the diagonal contain the block */
 /* >          reflector T; the elements below the diagonal are not used. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
 /* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1       ) */
 /* >                   ( v1  1    ) */
 /* >                   ( v1 v2  1 ) */
 /* >                   ( v1 v2 v3 ) */
 /* >                   ( v1 v2 v3 ) */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
 /* >  block reflector H is then given by */
 /* > */
 /* >               H = I - V * T * V**H */
 /* > */
 /* >  where V**H is the conjugate transpose of V. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zgeqrt2_(integer *m, integer *n, doublecomplex *a, 
 	integer *lda, doublecomplex *t, integer *ldt, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3;
    doublecomplex z__1, z__2;

    /* Local variables */
    integer i__, k;
    doublecomplex alpha;
    extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *), zgemv_(char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *), 
 	    ztrmv_(char *, char *, char *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *), 
 	    xerbla_(char *, integer *, ftnlen), zlarfg_(integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *);
    doublecomplex aii;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*ldt < f2cmax(1,*n)) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGEQRT2", &i__1, (ftnlen)7);
 	return 0;
    }

    k = f2cmin(*m,*n);

    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) */

 	i__2 = *m - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
 	zlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1]
 		, &c__1, &t[i__ + t_dim1]);
 	if (i__ < *n) {

 /*           Apply H(i) to A(I:M,I+1:N) from the left */

 	    i__2 = i__ + i__ * a_dim1;
 	    aii.r = a[i__2].r, aii.i = a[i__2].i;
 	    i__2 = i__ + i__ * a_dim1;
 	    a[i__2].r = 1., a[i__2].i = 0.;

 /*           W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] */

 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    zgemv_("C", &i__2, &i__3, &c_b1, &a[i__ + (i__ + 1) * a_dim1], 
 		    lda, &a[i__ + i__ * a_dim1], &c__1, &c_b2, &t[*n * t_dim1 
 		    + 1], &c__1);

 /*           A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H */

 	    d_cnjg(&z__2, &t[i__ + t_dim1]);
 	    z__1.r = -z__2.r, z__1.i = -z__2.i;
 	    alpha.r = z__1.r, alpha.i = z__1.i;
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    zgerc_(&i__2, &i__3, &alpha, &a[i__ + i__ * a_dim1], &c__1, &t[*n 
 		    * t_dim1 + 1], &c__1, &a[i__ + (i__ + 1) * a_dim1], lda);
 	    i__2 = i__ + i__ * a_dim1;
 	    a[i__2].r = aii.r, a[i__2].i = aii.i;
 	}
    }

    i__1 = *n;
    for (i__ = 2; i__ <= i__1; ++i__) {
 	i__2 = i__ + i__ * a_dim1;
 	aii.r = a[i__2].r, aii.i = a[i__2].i;
 	i__2 = i__ + i__ * a_dim1;
 	a[i__2].r = 1., a[i__2].i = 0.;

 /*        T(1:I-1,I) := alpha * A(I:M,1:I-1)**H * A(I:M,I) */

 	i__2 = i__ + t_dim1;
 	z__1.r = -t[i__2].r, z__1.i = -t[i__2].i;
 	alpha.r = z__1.r, alpha.i = z__1.i;
 	i__2 = *m - i__ + 1;
 	i__3 = i__ - 1;
 	zgemv_("C", &i__2, &i__3, &alpha, &a[i__ + a_dim1], lda, &a[i__ + i__ 
 		* a_dim1], &c__1, &c_b2, &t[i__ * t_dim1 + 1], &c__1);
 	i__2 = i__ + i__ * a_dim1;
 	a[i__2].r = aii.r, a[i__2].i = aii.i;

 /*        T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) */

 	i__2 = i__ - 1;
 	ztrmv_("U", "N", "N", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], 
 		&c__1);

 /*           T(I,I) = tau(I) */

 	i__2 = i__ + i__ * t_dim1;
 	i__3 = i__ + t_dim1;
 	t[i__2].r = t[i__3].r, t[i__2].i = t[i__3].i;
 	i__2 = i__ + t_dim1;
 	t[i__2].r = 0., t[i__2].i = 0.;
    }

 /*     End of ZGEQRT2 */

    return 0;
 } /* zgeqrt2_ */

--- a/lapack-netlib/SRC/zgeqrt3.c
+++ b/lapack-netlib/SRC/zgeqrt3.c
@@ -0,0 +1,693 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the c
 ompact WY representation of Q. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGEQRT3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgeqrt3
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgeqrt3
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgeqrt3
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*        SUBROUTINE ZGEQRT3( M, N, A, LDA, T, LDT, INFO ) */

 /*       INTEGER   INFO, LDA, M, N, LDT */
 /*       COMPLEX*16   A( LDA, * ), T( LDT, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGEQRT3 recursively computes a QR factorization of a complex M-by-N */
 /* > matrix A, using the compact WY representation of Q. */
 /* > */
 /* > Based on the algorithm of Elmroth and Gustavson, */
 /* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the complex M-by-N matrix A.  On exit, the elements on */
 /* >          and above the diagonal contain the N-by-N upper triangular matrix R; */
 /* >          the elements below the diagonal are the columns of V.  See below for */
 /* >          further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is COMPLEX*16 array, dimension (LDT,N) */
 /* >          The N-by-N upper triangular factor of the block reflector. */
 /* >          The elements on and above the diagonal contain the block */
 /* >          reflector T; the elements below the diagonal are not used. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup complex16GEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
 /* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1       ) */
 /* >                   ( v1  1    ) */
 /* >                   ( v1 v2  1 ) */
 /* >                   ( v1 v2 v3 ) */
 /* >                   ( v1 v2 v3 ) */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
 /* >  block reflector H is then given by */
 /* > */
 /* >               H = I - V * T * V**H */
 /* > */
 /* >  where V**H is the conjugate transpose of V. */
 /* > */
 /* >  For details of the algorithm, see Elmroth and Gustavson (cited above). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zgeqrt3_(integer *m, integer *n, doublecomplex *a, 
 	integer *lda, doublecomplex *t, integer *ldt, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5;
    doublecomplex z__1;

    /* Local variables */
    integer i__, j, iinfo;
    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *);
    integer i1, j1, n1, n2;
    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *), 
 	    xerbla_(char *, integer *, ftnlen), zlarfg_(integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -2;
    } else if (*m < *n) {
 	*info = -1;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*ldt < f2cmax(1,*n)) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGEQRT3", &i__1, (ftnlen)7);
 	return 0;
    }

    if (*n == 1) {

 /*        Compute Householder transform when N=1 */

 	zlarfg_(m, &a[a_dim1 + 1], &a[f2cmin(2,*m) + a_dim1], &c__1, &t[t_dim1 + 
 		1]);

    } else {

 /*        Otherwise, split A into blocks... */

 	n1 = *n / 2;
 	n2 = *n - n1;
 /* Computing MIN */
 	i__1 = n1 + 1;
 	j1 = f2cmin(i__1,*n);
 /* Computing MIN */
 	i__1 = *n + 1;
 	i1 = f2cmin(i__1,*m);

 /*        Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */

 	zgeqrt3_(m, &n1, &a[a_offset], lda, &t[t_offset], ldt, &iinfo);

 /*        Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] */

 	i__1 = n2;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = n1;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 		i__3 = i__ + (j + n1) * t_dim1;
 		i__4 = i__ + (j + n1) * a_dim1;
 		t[i__3].r = a[i__4].r, t[i__3].i = a[i__4].i;
 	    }
 	}
 	ztrmm_("L", "L", "C", "U", &n1, &n2, &c_b1, &a[a_offset], lda, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;

 	i__1 = *m - n1;
 	zgemm_("C", "N", &n1, &n2, &i__1, &c_b1, &a[j1 + a_dim1], lda, &a[j1 
 		+ j1 * a_dim1], lda, &c_b1, &t[j1 * t_dim1 + 1], ldt);

 	ztrmm_("L", "U", "C", "N", &n1, &n2, &c_b1, &t[t_offset], ldt, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;

 	i__1 = *m - n1;
 	z__1.r = -1., z__1.i = 0.;
 	zgemm_("N", "N", &i__1, &n2, &n1, &z__1, &a[j1 + a_dim1], lda, &t[j1 *
 		 t_dim1 + 1], ldt, &c_b1, &a[j1 + j1 * a_dim1], lda);

 	ztrmm_("L", "L", "N", "U", &n1, &n2, &c_b1, &a[a_offset], lda, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;

 	i__1 = n2;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = n1;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 		i__3 = i__ + (j + n1) * a_dim1;
 		i__4 = i__ + (j + n1) * a_dim1;
 		i__5 = i__ + (j + n1) * t_dim1;
 		z__1.r = a[i__4].r - t[i__5].r, z__1.i = a[i__4].i - t[i__5]
 			.i;
 		a[i__3].r = z__1.r, a[i__3].i = z__1.i;
 	    }
 	}

 /*        Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */

 	i__1 = *m - n1;
 	zgeqrt3_(&i__1, &n2, &a[j1 + j1 * a_dim1], lda, &t[j1 + j1 * t_dim1], 
 		ldt, &iinfo);

 /*        Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 */

 	i__1 = n1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = n2;
 	    for (j = 1; j <= i__2; ++j) {
 		i__3 = i__ + (j + n1) * t_dim1;
 		d_cnjg(&z__1, &a[j + n1 + i__ * a_dim1]);
 		t[i__3].r = z__1.r, t[i__3].i = z__1.i;
 	    }
 	}

 	ztrmm_("R", "L", "N", "U", &n1, &n2, &c_b1, &a[j1 + j1 * a_dim1], lda,
 		 &t[j1 * t_dim1 + 1], ldt);

 	i__1 = *m - *n;
 	zgemm_("C", "N", &n1, &n2, &i__1, &c_b1, &a[i1 + a_dim1], lda, &a[i1 
 		+ j1 * a_dim1], lda, &c_b1, &t[j1 * t_dim1 + 1], ldt);

 	z__1.r = -1., z__1.i = 0.;
 	ztrmm_("L", "U", "N", "N", &n1, &n2, &z__1, &t[t_offset], ldt, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;

 	ztrmm_("R", "U", "N", "N", &n1, &n2, &c_b1, &t[j1 + j1 * t_dim1], ldt,
 		 &t[j1 * t_dim1 + 1], ldt);

 /*        Y = (Y1,Y2); R = [ R1  A(1:N1,J1:N) ];  T = [T1 T3] */
 /*                         [  0        R2     ]       [ 0 T2] */

    }

    return 0;

 /*     End of ZGEQRT3 */

 } /* zgeqrt3_ */

--- a/lapack-netlib/SRC/zgerfs.c
+++ b/lapack-netlib/SRC/zgerfs.c
@@ -0,0 +1,914 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZGERFS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGERFS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgerfs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgerfs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgerfs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */
 /*                          X, LDX, FERR, BERR, WORK, RWORK, INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * ) */
 /*       COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
 /*      $                   WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGERFS improves the computed solution to a system of linear */
 /* > equations and provides error bounds and backward error estimates for */
 /* > the solution. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrices B and X.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          The original N-by-N matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AF */
 /* > \verbatim */
 /* >          AF is COMPLEX*16 array, dimension (LDAF,N) */
 /* >          The factors L and U from the factorization A = P*L*U */
 /* >          as computed by ZGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAF */
 /* > \verbatim */
 /* >          LDAF is INTEGER */
 /* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from ZGETRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          The right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] X */
 /* > \verbatim */
 /* >          X is COMPLEX*16 array, dimension (LDX,NRHS) */
 /* >          On entry, the solution matrix X, as computed by ZGETRS. */
 /* >          On exit, the improved solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  ITMAX is the maximum number of steps of iterative refinement. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zgerfs_(char *trans, integer *n, integer *nrhs, 
 	doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, 
 	integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, 
 	integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work,
 	 doublereal *rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
 	    x_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1;

    /* Local variables */
    integer kase;
    doublereal safe1, safe2;
    integer i__, j, k;
    doublereal s;
    extern logical lsame_(char *, char *);
    integer isave[3], count;
    extern /* Subroutine */ int zgemv_(char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *), 
 	    zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *), zlacn2_(integer *, 
 	    doublecomplex *, doublecomplex *, doublereal *, integer *, 
 	    integer *);
    extern doublereal dlamch_(char *);
    doublereal xk;
    integer nz;
    doublereal safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical notran;
    char transn[1], transt[1];
    doublereal lstres;
    extern /* Subroutine */ int zgetrs_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
 	     integer *);
    doublereal eps;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldaf < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -10;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -12;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGERFS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	i__1 = *nrhs;
 	for (j = 1; j <= i__1; ++j) {
 	    ferr[j] = 0.;
 	    berr[j] = 0.;
 /* L10: */
 	}
 	return 0;
    }

    if (notran) {
 	*(unsigned char *)transn = 'N';
 	*(unsigned char *)transt = 'C';
    } else {
 	*(unsigned char *)transn = 'C';
 	*(unsigned char *)transt = 'N';
    }

 /*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = dlamch_("Epsilon");
    safmin = dlamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

 /*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

 	count = 1;
 	lstres = 3.;
 L20:

 /*        Loop until stopping criterion is satisfied. */

 /*        Compute residual R = B - op(A) * X, */
 /*        where op(A) = A, A**T, or A**H, depending on TRANS. */

 	zcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);
 	z__1.r = -1., z__1.i = 0.;
 	zgemv_(trans, n, n, &z__1, &a[a_offset], lda, &x[j * x_dim1 + 1], &
 		c__1, &c_b1, &work[1], &c__1);

 /*        Compute componentwise relative backward error from formula */

 /*        f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */

 /*        where abs(Z) is the componentwise absolute value of the matrix */
 /*        or vector Z.  If the i-th component of the denominator is less */
 /*        than SAFE2, then SAFE1 is added to the i-th components of the */
 /*        numerator and denominator before dividing. */

 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    i__3 = i__ + j * b_dim1;
 	    rwork[i__] = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = d_imag(&b[
 		    i__ + j * b_dim1]), abs(d__2));
 /* L30: */
 	}

 /*        Compute abs(op(A))*abs(X) + abs(B). */

 	if (notran) {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		i__3 = k + j * x_dim1;
 		xk = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[k + j *
 			 x_dim1]), abs(d__2));
 		i__3 = *n;
 		for (i__ = 1; i__ <= i__3; ++i__) {
 		    i__4 = i__ + k * a_dim1;
 		    rwork[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = 
 			    d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * xk;
 /* L40: */
 		}
 /* L50: */
 	    }
 	} else {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		s = 0.;
 		i__3 = *n;
 		for (i__ = 1; i__ <= i__3; ++i__) {
 		    i__4 = i__ + k * a_dim1;
 		    i__5 = i__ + j * x_dim1;
 		    s += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[
 			    i__ + k * a_dim1]), abs(d__2))) * ((d__3 = x[i__5]
 			    .r, abs(d__3)) + (d__4 = d_imag(&x[i__ + j * 
 			    x_dim1]), abs(d__4)));
 /* L60: */
 		}
 		rwork[k] += s;
 /* L70: */
 	    }
 	}
 	s = 0.;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (rwork[i__] > safe2) {
 /* Computing MAX */
 		i__3 = i__;
 		d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&work[i__]), abs(d__2))) / rwork[i__];
 		s = f2cmax(d__3,d__4);
 	    } else {
 /* Computing MAX */
 		i__3 = i__;
 		d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&work[i__]), abs(d__2)) + safe1) / (rwork[i__] 
 			+ safe1);
 		s = f2cmax(d__3,d__4);
 	    }
 /* L80: */
 	}
 	berr[j] = s;

 /*        Test stopping criterion. Continue iterating if */
 /*           1) The residual BERR(J) is larger than machine epsilon, and */
 /*           2) BERR(J) decreased by at least a factor of 2 during the */
 /*              last iteration, and */
 /*           3) At most ITMAX iterations tried. */

 	if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) {

 /*           Update solution and try again. */

 	    zgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1],
 		     n, info);
 	    zaxpy_(n, &c_b1, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1);
 	    lstres = berr[j];
 	    ++count;
 	    goto L20;
 	}

 /*        Bound error from formula */

 /*        norm(X - XTRUE) / norm(X) .le. FERR = */
 /*        norm( abs(inv(op(A)))* */
 /*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */

 /*        where */
 /*          norm(Z) is the magnitude of the largest component of Z */
 /*          inv(op(A)) is the inverse of op(A) */
 /*          abs(Z) is the componentwise absolute value of the matrix or */
 /*             vector Z */
 /*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
 /*          EPS is machine epsilon */

 /*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
 /*        is incremented by SAFE1 if the i-th component of */
 /*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */

 /*        Use ZLACN2 to estimate the infinity-norm of the matrix */
 /*           inv(op(A)) * diag(W), */
 /*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */

 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (rwork[i__] > safe2) {
 		i__3 = i__;
 		rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__]
 			;
 	    } else {
 		i__3 = i__;
 		rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__]
 			 + safe1;
 	    }
 /* L90: */
 	}

 	kase = 0;
 L100:
 	zlacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave);
 	if (kase != 0) {
 	    if (kase == 1) {

 /*              Multiply by diag(W)*inv(op(A)**H). */

 		zgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
 			work[1], n, info);
 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__;
 		    z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] 
 			    * work[i__5].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 /* L110: */
 		}
 	    } else {

 /*              Multiply by inv(op(A))*diag(W). */

 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__;
 		    z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] 
 			    * work[i__5].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 /* L120: */
 		}
 		zgetrs_(transn, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
 			work[1], n, info);
 	    }
 	    goto L100;
 	}

 /*        Normalize error. */

 	lstres = 0.;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    i__3 = i__ + j * x_dim1;
 	    d__3 = lstres, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = 
 		    d_imag(&x[i__ + j * x_dim1]), abs(d__2));
 	    lstres = f2cmax(d__3,d__4);
 /* L130: */
 	}
 	if (lstres != 0.) {
 	    ferr[j] /= lstres;
 	}

 /* L140: */
    }

    return 0;

 /*     End of ZGERFS */

 } /* zgerfs_ */

--- a/lapack-netlib/SRC/zgerfsx.c
+++ b/lapack-netlib/SRC/zgerfsx.c
@@ -0,0 +1,381 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
--- a/lapack-netlib/SRC/zgerq2.c
+++ b/lapack-netlib/SRC/zgerq2.c
@@ -0,0 +1,594 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b ZGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorit
 hm. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGERQ2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgerq2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgerq2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgerq2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGERQ2( M, N, A, LDA, TAU, WORK, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGERQ2 computes an RQ factorization of a complex m by n matrix A: */
 /* > A = R * Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, if m <= n, the upper triangle of the subarray */
 /* >          A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */
 /* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
 /* >          contain the m by n upper trapezoidal matrix R; the remaining */
 /* >          elements, with the array TAU, represent the unitary matrix */
 /* >          Q as a product of elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX*16 array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (M) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1)**H H(2)**H . . . H(k)**H, where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on */
 /* >  exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zgerq2_(integer *m, integer *n, doublecomplex *a, 
 	integer *lda, doublecomplex *tau, doublecomplex *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    integer i__, k;
    doublecomplex alpha;
    extern /* Subroutine */ int zlarf_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *, doublecomplex *), xerbla_(char *, integer *, ftnlen), zlarfg_(integer *, doublecomplex *, doublecomplex *, 
 	    integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, 
 	    integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGERQ2", &i__1, (ftnlen)6);
 	return 0;
    }

    k = f2cmin(*m,*n);

    for (i__ = k; i__ >= 1; --i__) {

 /*        Generate elementary reflector H(i) to annihilate */
 /*        A(m-k+i,1:n-k+i-1) */

 	i__1 = *n - k + i__;
 	zlacgv_(&i__1, &a[*m - k + i__ + a_dim1], lda);
 	i__1 = *m - k + i__ + (*n - k + i__) * a_dim1;
 	alpha.r = a[i__1].r, alpha.i = a[i__1].i;
 	i__1 = *n - k + i__;
 	zlarfg_(&i__1, &alpha, &a[*m - k + i__ + a_dim1], lda, &tau[i__]);

 /*        Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */

 	i__1 = *m - k + i__ + (*n - k + i__) * a_dim1;
 	a[i__1].r = 1., a[i__1].i = 0.;
 	i__1 = *m - k + i__ - 1;
 	i__2 = *n - k + i__;
 	zlarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[
 		i__], &a[a_offset], lda, &work[1]);
 	i__1 = *m - k + i__ + (*n - k + i__) * a_dim1;
 	a[i__1].r = alpha.r, a[i__1].i = alpha.i;
 	i__1 = *n - k + i__ - 1;
 	zlacgv_(&i__1, &a[*m - k + i__ + a_dim1], lda);
 /* L10: */
    }
    return 0;

 /*     End of ZGERQ2 */

 } /* zgerq2_ */

--- a/lapack-netlib/SRC/zgerqf.c
+++ b/lapack-netlib/SRC/zgerqf.c
@@ -0,0 +1,713 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;

 /* > \brief \b ZGERQF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGERQF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgerqf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgerqf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgerqf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGERQF computes an RQ factorization of a complex M-by-N matrix A: */
 /* > A = R * Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >          if m <= n, the upper triangle of the subarray */
 /* >          A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */
 /* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
 /* >          contain the M-by-N upper trapezoidal matrix R; */
 /* >          the remaining elements, with the array TAU, represent the */
 /* >          unitary matrix Q as a product of f2cmin(m,n) elementary */
 /* >          reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX*16 array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,M). */
 /* >          For optimum performance LWORK >= M*NB, where NB is */
 /* >          the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1)**H H(2)**H . . . H(k)**H, where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on */
 /* >  exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zgerqf_(integer *m, integer *n, doublecomplex *a, 
 	integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork,
 	 integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int zgerq2_(integer *, integer *, doublecomplex *,
 	     integer *, doublecomplex *, doublecomplex *, integer *);
    integer ib, nb, ki, kk, mu, nu, nx;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *);
    integer ldwork;
    extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *);
    integer lwkopt;
    logical lquery;
    integer iws;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }

    if (*info == 0) {
 	k = f2cmin(*m,*n);
 	if (k == 0) {
 	    lwkopt = 1;
 	} else {
 	    nb = ilaenv_(&c__1, "ZGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    lwkopt = *m * nb;
 	}
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;

 	if (*lwork < f2cmax(1,*m) && ! lquery) {
 	    *info = -7;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGERQF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (k == 0) {
 	return 0;
    }

    nbmin = 2;
    nx = 1;
    iws = *m;
    if (nb > 1 && nb < k) {

 /*        Determine when to cross over from blocked to unblocked code. */

 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "ZGERQF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {

 /*           Determine if workspace is large enough for blocked code. */

 	    ldwork = *m;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {

 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */

 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "ZGERQF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }

    if (nb >= nbmin && nb < k && nx < k) {

 /*        Use blocked code initially. */
 /*        The last kk rows are handled by the block method. */

 	ki = (k - nx - 1) / nb * nb;
 /* Computing MIN */
 	i__1 = k, i__2 = ki + nb;
 	kk = f2cmin(i__1,i__2);

 	i__1 = k - kk + 1;
 	i__2 = -nb;
 	for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ 
 		+= i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);

 /*           Compute the RQ factorization of the current block */
 /*           A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */

 	    i__3 = *n - k + i__ + ib - 1;
 	    zgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], &
 		    work[1], &iinfo);
 	    if (*m - k + i__ > 1) {

 /*              Form the triangular factor of the block reflector */
 /*              H = H(i+ib-1) . . . H(i+1) H(i) */

 		i__3 = *n - k + i__ + ib - 1;
 		zlarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + 
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);

 /*              Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */

 		i__3 = *m - k + i__ - 1;
 		i__4 = *n - k + i__ + ib - 1;
 		zlarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3,
 			 &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1],
 			 &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork);
 	    }
 /* L10: */
 	}
 	mu = *m - k + i__ + nb - 1;
 	nu = *n - k + i__ + nb - 1;
    } else {
 	mu = *m;
 	nu = *n;
    }

 /*     Use unblocked code to factor the last or only block */

    if (mu > 0 && nu > 0) {
 	zgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    }

    work[1].r = (doublereal) iws, work[1].i = 0.;
    return 0;

 /*     End of ZGERQF */

 } /* zgerqf_ */

--- a/lapack-netlib/SRC/zgesc2.c
+++ b/lapack-netlib/SRC/zgesc2.c
@@ -0,0 +1,630 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static doublecomplex c_b13 = {1.,0.};
 static integer c_n1 = -1;

 /* > \brief \b ZGESC2 solves a system of linear equations using the LU factorization with complete pivoting co
 mputed by sgetc2. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGESC2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgesc2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgesc2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgesc2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) */

 /*       INTEGER            LDA, N */
 /*       DOUBLE PRECISION   SCALE */
 /*       INTEGER            IPIV( * ), JPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), RHS( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGESC2 solves a system of linear equations */
 /* > */
 /* >           A * X = scale* RHS */
 /* > */
 /* > with a general N-by-N matrix A using the LU factorization with */
 /* > complete pivoting computed by ZGETC2. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the  LU part of the factorization of the n-by-n */
 /* >          matrix A computed by ZGETC2:  A = P * L * U * Q */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1, N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] RHS */
 /* > \verbatim */
 /* >          RHS is COMPLEX*16 array, dimension N. */
 /* >          On entry, the right hand side vector b. */
 /* >          On exit, the solution vector X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N). */
 /* >          The pivot indices; for 1 <= i <= N, row i of the */
 /* >          matrix has been interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JPIV */
 /* > \verbatim */
 /* >          JPIV is INTEGER array, dimension (N). */
 /* >          The pivot indices; for 1 <= j <= N, column j of the */
 /* >          matrix has been interchanged with column JPIV(j). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] SCALE */
 /* > \verbatim */
 /* >          SCALE is DOUBLE PRECISION */
 /* >           On exit, SCALE contains the scale factor. SCALE is chosen */
 /* >           0 <= SCALE <= 1 to prevent overflow in the solution. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16GEauxiliary */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
 /* >     Umea University, S-901 87 Umea, Sweden. */

 /*  ===================================================================== */
 /* Subroutine */ int zgesc2_(integer *n, doublecomplex *a, integer *lda, 
 	doublecomplex *rhs, integer *ipiv, integer *jpiv, doublereal *scale)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
    doublereal d__1;
    doublecomplex z__1, z__2, z__3;

    /* Local variables */
    doublecomplex temp;
    integer i__, j;
    extern /* Subroutine */ int zscal_(integer *, doublecomplex *, 
 	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
    extern doublereal dlamch_(char *);
    doublereal bignum;
    extern integer izamax_(integer *, doublecomplex *, integer *);
    doublereal smlnum;
    extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *,
 	     integer *, integer *, integer *, integer *);
    doublereal eps;


 /*  -- LAPACK auxiliary routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Set constant to control overflow */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --rhs;
    --ipiv;
    --jpiv;

    /* Function Body */
    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);

 /*     Apply permutations IPIV to RHS */

    i__1 = *n - 1;
    zlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1);

 /*     Solve for L part */

    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = *n;
 	for (j = i__ + 1; j <= i__2; ++j) {
 	    i__3 = j;
 	    i__4 = j;
 	    i__5 = j + i__ * a_dim1;
 	    i__6 = i__;
 	    z__2.r = a[i__5].r * rhs[i__6].r - a[i__5].i * rhs[i__6].i, 
 		    z__2.i = a[i__5].r * rhs[i__6].i + a[i__5].i * rhs[i__6]
 		    .r;
 	    z__1.r = rhs[i__4].r - z__2.r, z__1.i = rhs[i__4].i - z__2.i;
 	    rhs[i__3].r = z__1.r, rhs[i__3].i = z__1.i;
 /* L10: */
 	}
 /* L20: */
    }

 /*     Solve for U part */

    *scale = 1.;

 /*     Check for scaling */

    i__ = izamax_(n, &rhs[1], &c__1);
    if (smlnum * 2. * z_abs(&rhs[i__]) > z_abs(&a[*n + *n * a_dim1])) {
 	d__1 = z_abs(&rhs[i__]);
 	z__1.r = .5 / d__1, z__1.i = 0. / d__1;
 	temp.r = z__1.r, temp.i = z__1.i;
 	zscal_(n, &temp, &rhs[1], &c__1);
 	*scale *= temp.r;
    }
    for (i__ = *n; i__ >= 1; --i__) {
 	z_div(&z__1, &c_b13, &a[i__ + i__ * a_dim1]);
 	temp.r = z__1.r, temp.i = z__1.i;
 	i__1 = i__;
 	i__2 = i__;
 	z__1.r = rhs[i__2].r * temp.r - rhs[i__2].i * temp.i, z__1.i = rhs[
 		i__2].r * temp.i + rhs[i__2].i * temp.r;
 	rhs[i__1].r = z__1.r, rhs[i__1].i = z__1.i;
 	i__1 = *n;
 	for (j = i__ + 1; j <= i__1; ++j) {
 	    i__2 = i__;
 	    i__3 = i__;
 	    i__4 = j;
 	    i__5 = i__ + j * a_dim1;
 	    z__3.r = a[i__5].r * temp.r - a[i__5].i * temp.i, z__3.i = a[i__5]
 		    .r * temp.i + a[i__5].i * temp.r;
 	    z__2.r = rhs[i__4].r * z__3.r - rhs[i__4].i * z__3.i, z__2.i = 
 		    rhs[i__4].r * z__3.i + rhs[i__4].i * z__3.r;
 	    z__1.r = rhs[i__3].r - z__2.r, z__1.i = rhs[i__3].i - z__2.i;
 	    rhs[i__2].r = z__1.r, rhs[i__2].i = z__1.i;
 /* L30: */
 	}
 /* L40: */
    }

 /*     Apply permutations JPIV to the solution (RHS) */

    i__1 = *n - 1;
    zlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1);
    return 0;

 /*     End of ZGESC2 */

 } /* zgesc2_ */

--- a/lapack-netlib/SRC/zgesdd.c
+++ b/lapack-netlib/SRC/zgesdd.c
--- a/lapack-netlib/SRC/zgesv.c
+++ b/lapack-netlib/SRC/zgesv.c
@@ -0,0 +1,577 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief <b> ZGESV computes the solution to system of linear equations A * X = B for GE matrices (simple dr
 iver) </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGESV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgesv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgesv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgesv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */

 /*       INTEGER            INFO, LDA, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGESV computes the solution to a complex system of linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */
 /* > */
 /* > The LU decomposition with partial pivoting and row interchanges is */
 /* > used to factor A as */
 /* >    A = P * L * U, */
 /* > where P is a permutation matrix, L is unit lower triangular, and U is */
 /* > upper triangular.  The factored form of A is then used to solve the */
 /* > system of equations A * X = B. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the N-by-N coefficient matrix A. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices that define the permutation matrix P; */
 /* >          row i of the matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS matrix of right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, so the solution could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup complex16GEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int zgesv_(integer *n, integer *nrhs, doublecomplex *a, 
 	integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *
 	info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zgetrf_(
 	    integer *, integer *, doublecomplex *, integer *, integer *, 
 	    integer *), zgetrs_(char *, integer *, integer *, doublecomplex *,
 	     integer *, integer *, doublecomplex *, integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
    } else if (*nrhs < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGESV ", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Compute the LU factorization of A. */

    zgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 	zgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
 		b_offset], ldb, info);
    }
    return 0;

 /*     End of ZGESV */

 } /* zgesv_ */

--- a/lapack-netlib/SRC/zgesvd.c
+++ b/lapack-netlib/SRC/zgesvd.c
--- a/lapack-netlib/SRC/zgesvdq.c
+++ b/lapack-netlib/SRC/zgesvdq.c
--- a/lapack-netlib/SRC/zgesvdx.c
+++ b/lapack-netlib/SRC/zgesvdx.c
--- a/lapack-netlib/SRC/zgesvj.c
+++ b/lapack-netlib/SRC/zgesvj.c
--- a/lapack-netlib/SRC/zgesvx.c
+++ b/lapack-netlib/SRC/zgesvx.c
--- a/lapack-netlib/SRC/zgesvxx.c
+++ b/lapack-netlib/SRC/zgesvxx.c
--- a/lapack-netlib/SRC/zgetc2.c
+++ b/lapack-netlib/SRC/zgetc2.c
@@ -0,0 +1,655 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static doublecomplex c_b10 = {-1.,0.};

 /* > \brief \b ZGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGETC2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgetc2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgetc2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgetc2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) */

 /*       INTEGER            INFO, LDA, N */
 /*       INTEGER            IPIV( * ), JPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGETC2 computes an LU factorization, using complete pivoting, of the */
 /* > n-by-n matrix A. The factorization has the form A = P * L * U * Q, */
 /* > where P and Q are permutation matrices, L is lower triangular with */
 /* > unit diagonal elements and U is upper triangular. */
 /* > */
 /* > This is a level 1 BLAS version of the algorithm. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the n-by-n matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U*Q; the unit diagonal elements of L are not stored. */
 /* >          If U(k, k) appears to be less than SMIN, U(k, k) is given the */
 /* >          value of SMIN, giving a nonsingular perturbed system. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1, N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N). */
 /* >          The pivot indices; for 1 <= i <= N, row i of the */
 /* >          matrix has been interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] JPIV */
 /* > \verbatim */
 /* >          JPIV is INTEGER array, dimension (N). */
 /* >          The pivot indices; for 1 <= j <= N, column j of the */
 /* >          matrix has been interchanged with column JPIV(j). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >           = 0: successful exit */
 /* >           > 0: if INFO = k, U(k, k) is likely to produce overflow if */
 /* >                one tries to solve for x in Ax = b. So U is perturbed */
 /* >                to avoid the overflow. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup complex16GEauxiliary */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
 /* >     Umea University, S-901 87 Umea, Sweden. */

 /*  ===================================================================== */
 /* Subroutine */ int zgetc2_(integer *n, doublecomplex *a, integer *lda, 
 	integer *ipiv, integer *jpiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublereal d__1;
    doublecomplex z__1;

    /* Local variables */
    doublereal smin, xmax;
    integer i__, j;
    extern /* Subroutine */ int zgeru_(integer *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *), zswap_(integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *), dlabad_(doublereal *, 
 	    doublereal *);
    extern doublereal dlamch_(char *);
    integer ip, jp;
    doublereal bignum, smlnum, eps;
    integer ipv, jpv;


 /*  -- LAPACK auxiliary routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --jpiv;

    /* Function Body */
    *info = 0;

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Set constants to control overflow */

    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);

 /*     Handle the case N=1 by itself */

    if (*n == 1) {
 	ipiv[1] = 1;
 	jpiv[1] = 1;
 	if (z_abs(&a[a_dim1 + 1]) < smlnum) {
 	    *info = 1;
 	    i__1 = a_dim1 + 1;
 	    z__1.r = smlnum, z__1.i = 0.;
 	    a[i__1].r = z__1.r, a[i__1].i = z__1.i;
 	}
 	return 0;
    }

 /*     Factorize A using complete pivoting. */
 /*     Set pivots less than SMIN to SMIN */

    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Find f2cmax element in matrix A */

 	xmax = 0.;
 	i__2 = *n;
 	for (ip = i__; ip <= i__2; ++ip) {
 	    i__3 = *n;
 	    for (jp = i__; jp <= i__3; ++jp) {
 		if (z_abs(&a[ip + jp * a_dim1]) >= xmax) {
 		    xmax = z_abs(&a[ip + jp * a_dim1]);
 		    ipv = ip;
 		    jpv = jp;
 		}
 /* L10: */
 	    }
 /* L20: */
 	}
 	if (i__ == 1) {
 /* Computing MAX */
 	    d__1 = eps * xmax;
 	    smin = f2cmax(d__1,smlnum);
 	}

 /*        Swap rows */

 	if (ipv != i__) {
 	    zswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda);
 	}
 	ipiv[i__] = ipv;

 /*        Swap columns */

 	if (jpv != i__) {
 	    zswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], &
 		    c__1);
 	}
 	jpiv[i__] = jpv;

 /*        Check for singularity */

 	if (z_abs(&a[i__ + i__ * a_dim1]) < smin) {
 	    *info = i__;
 	    i__2 = i__ + i__ * a_dim1;
 	    z__1.r = smin, z__1.i = 0.;
 	    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
 	}
 	i__2 = *n;
 	for (j = i__ + 1; j <= i__2; ++j) {
 	    i__3 = j + i__ * a_dim1;
 	    z_div(&z__1, &a[j + i__ * a_dim1], &a[i__ + i__ * a_dim1]);
 	    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
 /* L30: */
 	}
 	i__2 = *n - i__;
 	i__3 = *n - i__;
 	zgeru_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[
 		i__ + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * 
 		a_dim1], lda);
 /* L40: */
    }

    if (z_abs(&a[*n + *n * a_dim1]) < smin) {
 	*info = *n;
 	i__1 = *n + *n * a_dim1;
 	z__1.r = smin, z__1.i = 0.;
 	a[i__1].r = z__1.r, a[i__1].i = z__1.i;
    }

 /*     Set last pivots to N */

    ipiv[*n] = *n;
    jpiv[*n] = *n;

    return 0;

 /*     End of ZGETC2 */

 } /* zgetc2_ */

--- a/lapack-netlib/SRC/zgetf2.c
+++ b/lapack-netlib/SRC/zgetf2.c
@@ -0,0 +1,624 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row
 interchanges (unblocked algorithm). */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGETF2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgetf2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgetf2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgetf2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGETF2( M, N, A, LDA, IPIV, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGETF2 computes an LU factorization of a general m-by-n matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = P * L * U */
 /* > where P is a permutation matrix, L is lower triangular with unit */
 /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
 /* > triangular (upper trapezoidal if m < n). */
 /* > */
 /* > This is the right-looking Level 2 BLAS version of the algorithm. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -k, the k-th argument had an illegal value */
 /* >          > 0: if INFO = k, U(k,k) is exactly zero. The factorization */
 /* >               has been completed, but the factor U is exactly */
 /* >               singular, and division by zero will occur if it is used */
 /* >               to solve a system of equations. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zgetf2_(integer *m, integer *n, doublecomplex *a, 
 	integer *lda, integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublecomplex z__1;

    /* Local variables */
    integer i__, j;
    doublereal sfmin;
    extern /* Subroutine */ int zscal_(integer *, doublecomplex *, 
 	    doublecomplex *, integer *), zgeru_(integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *), zswap_(integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
    extern doublereal dlamch_(char *);
    integer jp;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer izamax_(integer *, doublecomplex *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGETF2", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*m == 0 || *n == 0) {
 	return 0;
    }

 /*     Compute machine safe minimum */

    sfmin = dlamch_("S");

    i__1 = f2cmin(*m,*n);
    for (j = 1; j <= i__1; ++j) {

 /*        Find pivot and test for singularity. */

 	i__2 = *m - j + 1;
 	jp = j - 1 + izamax_(&i__2, &a[j + j * a_dim1], &c__1);
 	ipiv[j] = jp;
 	i__2 = jp + j * a_dim1;
 	if (a[i__2].r != 0. || a[i__2].i != 0.) {

 /*           Apply the interchange to columns 1:N. */

 	    if (jp != j) {
 		zswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
 	    }

 /*           Compute elements J+1:M of J-th column. */

 	    if (j < *m) {
 		if (z_abs(&a[j + j * a_dim1]) >= sfmin) {
 		    i__2 = *m - j;
 		    z_div(&z__1, &c_b1, &a[j + j * a_dim1]);
 		    zscal_(&i__2, &z__1, &a[j + 1 + j * a_dim1], &c__1);
 		} else {
 		    i__2 = *m - j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = j + i__ + j * a_dim1;
 			z_div(&z__1, &a[j + i__ + j * a_dim1], &a[j + j * 
 				a_dim1]);
 			a[i__3].r = z__1.r, a[i__3].i = z__1.i;
 /* L20: */
 		    }
 		}
 	    }

 	} else if (*info == 0) {

 	    *info = j;
 	}

 	if (j < f2cmin(*m,*n)) {

 /*           Update trailing submatrix. */

 	    i__2 = *m - j;
 	    i__3 = *n - j;
 	    z__1.r = -1., z__1.i = 0.;
 	    zgeru_(&i__2, &i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, &a[j + 
 		    (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda)
 		    ;
 	}
 /* L10: */
    }
    return 0;

 /*     End of ZGETF2 */

 } /* zgetf2_ */

--- a/lapack-netlib/SRC/zgetrf.c
+++ b/lapack-netlib/SRC/zgetrf.c
@@ -0,0 +1,645 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b ZGETRF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGETRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgetrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgetrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgetrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGETRF computes an LU factorization of a general M-by-N matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = P * L * U */
 /* > where P is a permutation matrix, L is lower triangular with unit */
 /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
 /* > triangular (upper trapezoidal if m < n). */
 /* > */
 /* > This is the right-looking Level 3 BLAS version of the algorithm. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and division by zero will occur if it is used */
 /* >                to solve a system of equations. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zgetrf_(integer *m, integer *n, doublecomplex *a, 
 	integer *lda, integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    doublecomplex z__1;

    /* Local variables */
    integer i__, j, iinfo;
    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *), ztrsm_(char *, char *, char *, char *,
 	     integer *, integer *, doublecomplex *, doublecomplex *, integer *
 	    , doublecomplex *, integer *);
    integer jb, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *,
 	     integer *, integer *, integer *, integer *), zgetrf2_(integer *, 
 	    integer *, doublecomplex *, integer *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGETRF", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*m == 0 || *n == 0) {
 	return 0;
    }

 /*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "ZGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
    if (nb <= 1 || nb >= f2cmin(*m,*n)) {

 /*        Use unblocked code. */

 	zgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
    } else {

 /*        Use blocked code. */

 	i__1 = f2cmin(*m,*n);
 	i__2 = nb;
 	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 /* Computing MIN */
 	    i__3 = f2cmin(*m,*n) - j + 1;
 	    jb = f2cmin(i__3,nb);

 /*           Factor diagonal and subdiagonal blocks and test for exact */
 /*           singularity. */

 	    i__3 = *m - j + 1;
 	    zgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);

 /*           Adjust INFO and the pivot indices. */

 	    if (*info == 0 && iinfo > 0) {
 		*info = iinfo + j - 1;
 	    }
 /* Computing MIN */
 	    i__4 = *m, i__5 = j + jb - 1;
 	    i__3 = f2cmin(i__4,i__5);
 	    for (i__ = j; i__ <= i__3; ++i__) {
 		ipiv[i__] = j - 1 + ipiv[i__];
 /* L10: */
 	    }

 /*           Apply interchanges to columns 1:J-1. */

 	    i__3 = j - 1;
 	    i__4 = j + jb - 1;
 	    zlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);

 	    if (j + jb <= *n) {

 /*              Apply interchanges to columns J+JB:N. */

 		i__3 = *n - j - jb + 1;
 		i__4 = j + jb - 1;
 		zlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
 			ipiv[1], &c__1);

 /*              Compute block row of U. */

 		i__3 = *n - j - jb + 1;
 		ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
 			c_b1, &a[j + j * a_dim1], lda, &a[j + (j + jb) * 
 			a_dim1], lda);
 		if (j + jb <= *m) {

 /*                 Update trailing submatrix. */

 		    i__3 = *m - j - jb + 1;
 		    i__4 = *n - j - jb + 1;
 		    z__1.r = -1., z__1.i = 0.;
 		    zgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, 
 			    &z__1, &a[j + jb + j * a_dim1], lda, &a[j + (j + 
 			    jb) * a_dim1], lda, &c_b1, &a[j + jb + (j + jb) * 
 			    a_dim1], lda);
 		}
 	    }
 /* L20: */
 	}
    }
    return 0;

 /*     End of ZGETRF */

 } /* zgetrf_ */

--- a/lapack-netlib/SRC/zgetrf2.c
+++ b/lapack-netlib/SRC/zgetrf2.c
@@ -0,0 +1,690 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZGETRF2 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /*  Definition: */
 /*  =========== */

 /*        SUBROUTINE ZGETRF2( M, N, A, LDA, IPIV, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGETRF2 computes an LU factorization of a general M-by-N matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = P * L * U */
 /* > where P is a permutation matrix, L is lower triangular with unit */
 /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
 /* > triangular (upper trapezoidal if m < n). */
 /* > */
 /* > This is the recursive version of the algorithm. It divides */
 /* > the matrix into four submatrices: */
 /* > */
 /* >        [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2 */
 /* >    A = [ -----|----- ]  with n1 = f2cmin(m,n)/2 */
 /* >        [  A21 | A22  ]       n2 = n-n1 */
 /* > */
 /* >                                       [ A11 ] */
 /* > The subroutine calls itself to factor [ --- ], */
 /* >                                       [ A12 ] */
 /* >                 [ A12 ] */
 /* > do the swaps on [ --- ], solve A12, update A22, */
 /* >                 [ A22 ] */
 /* > */
 /* > then calls itself to factor A22 and do the swaps on A21. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and division by zero will occur if it is used */
 /* >                to solve a system of equations. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup complex16GEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zgetrf2_(integer *m, integer *n, doublecomplex *a, 
 	integer *lda, integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    doublecomplex z__1;

    /* Local variables */
    doublecomplex temp;
    integer i__, iinfo;
    doublereal sfmin;
    extern /* Subroutine */ int zscal_(integer *, doublecomplex *, 
 	    doublecomplex *, integer *), zgemm_(char *, char *, integer *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *);
    integer n1, n2;
    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *);
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer izamax_(integer *, doublecomplex *, integer *);
    extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *,
 	     integer *, integer *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGETRF2", &i__1, (ftnlen)7);
 	return 0;
    }

 /*     Quick return if possible */

    if (*m == 0 || *n == 0) {
 	return 0;
    }
    if (*m == 1) {

 /*        Use unblocked code for one row case */
 /*        Just need to handle IPIV and INFO */

 	ipiv[1] = 1;
 	i__1 = a_dim1 + 1;
 	if (a[i__1].r == 0. && a[i__1].i == 0.) {
 	    *info = 1;
 	}

    } else if (*n == 1) {

 /*        Use unblocked code for one column case */


 /*        Compute machine safe minimum */

 	sfmin = dlamch_("S");

 /*        Find pivot and test for singularity */

 	i__ = izamax_(m, &a[a_dim1 + 1], &c__1);
 	ipiv[1] = i__;
 	i__1 = i__ + a_dim1;
 	if (a[i__1].r != 0. || a[i__1].i != 0.) {

 /*           Apply the interchange */

 	    if (i__ != 1) {
 		i__1 = a_dim1 + 1;
 		temp.r = a[i__1].r, temp.i = a[i__1].i;
 		i__1 = a_dim1 + 1;
 		i__2 = i__ + a_dim1;
 		a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
 		i__1 = i__ + a_dim1;
 		a[i__1].r = temp.r, a[i__1].i = temp.i;
 	    }

 /*           Compute elements 2:M of the column */

 	    if (z_abs(&a[a_dim1 + 1]) >= sfmin) {
 		i__1 = *m - 1;
 		z_div(&z__1, &c_b1, &a[a_dim1 + 1]);
 		zscal_(&i__1, &z__1, &a[a_dim1 + 2], &c__1);
 	    } else {
 		i__1 = *m - 1;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    i__2 = i__ + 1 + a_dim1;
 		    z_div(&z__1, &a[i__ + 1 + a_dim1], &a[a_dim1 + 1]);
 		    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
 /* L10: */
 		}
 	    }

 	} else {
 	    *info = 1;
 	}
    } else {

 /*        Use recursive code */

 	n1 = f2cmin(*m,*n) / 2;
 	n2 = *n - n1;

 /*               [ A11 ] */
 /*        Factor [ --- ] */
 /*               [ A21 ] */

 	zgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo);
 	if (*info == 0 && iinfo > 0) {
 	    *info = iinfo;
 	}

 /*                              [ A12 ] */
 /*        Apply interchanges to [ --- ] */
 /*                              [ A22 ] */

 	zlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &
 		c__1);

 /*        Solve A12 */

 	ztrsm_("L", "L", "N", "U", &n1, &n2, &c_b1, &a[a_offset], lda, &a[(n1 
 		+ 1) * a_dim1 + 1], lda);

 /*        Update A22 */

 	i__1 = *m - n1;
 	z__1.r = -1., z__1.i = 0.;
 	zgemm_("N", "N", &i__1, &n2, &n1, &z__1, &a[n1 + 1 + a_dim1], lda, &a[
 		(n1 + 1) * a_dim1 + 1], lda, &c_b1, &a[n1 + 1 + (n1 + 1) * 
 		a_dim1], lda);

 /*        Factor A22 */

 	i__1 = *m - n1;
 	zgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 
 		1], &iinfo);

 /*        Adjust INFO and the pivot indices */

 	if (*info == 0 && iinfo > 0) {
 	    *info = iinfo + n1;
 	}
 	i__1 = f2cmin(*m,*n);
 	for (i__ = n1 + 1; i__ <= i__1; ++i__) {
 	    ipiv[i__] += n1;
 /* L20: */
 	}

 /*        Apply interchanges to A21 */

 	i__1 = n1 + 1;
 	i__2 = f2cmin(*m,*n);
 	zlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1);

    }
    return 0;

 /*     End of ZGETRF2 */

 } /* zgetrf2_ */

--- a/lapack-netlib/SRC/zgetri.c
+++ b/lapack-netlib/SRC/zgetri.c
@@ -0,0 +1,700 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b2 = {1.,0.};
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;

 /* > \brief \b ZGETRI */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGETRI + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgetri.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgetri.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgetri.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, N */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGETRI computes the inverse of a matrix using the LU factorization */
 /* > computed by ZGETRF. */
 /* > */
 /* > This method inverts U and then computes inv(A) by solving the system */
 /* > inv(A)*L = inv(U) for inv(A). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the factors L and U from the factorization */
 /* >          A = P*L*U as computed by ZGETRF. */
 /* >          On exit, if INFO = 0, the inverse of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from ZGETRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For optimal performance LWORK >= N*NB, where NB is */
 /* >          the optimal blocksize returned by ILAENV. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is */
 /* >                singular and its inverse could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zgetri_(integer *n, doublecomplex *a, integer *lda, 
 	integer *ipiv, doublecomplex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    doublecomplex z__1;

    /* Local variables */
    integer i__, j, nbmin;
    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *), zgemv_(char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *), 
 	    zswap_(integer *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *), ztrsm_(char *, char *, char *, char *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *);
    integer jb, nb, jj, jp, nn;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    extern /* Subroutine */ int ztrtri_(char *, char *, integer *, 
 	    doublecomplex *, integer *, integer *);
    integer iws;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --work;

    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "ZGETRI", " ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    lwkopt = *n * nb;
    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -3;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGETRI", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form inv(U).  If INFO > 0 from ZTRTRI, then U is singular, */
 /*     and the inverse is not computed. */

    ztrtri_("Upper", "Non-unit", n, &a[a_offset], lda, info);
    if (*info > 0) {
 	return 0;
    }

    nbmin = 2;
    ldwork = *n;
    if (nb > 1 && nb < *n) {
 /* Computing MAX */
 	i__1 = ldwork * nb;
 	iws = f2cmax(i__1,1);
 	if (*lwork < iws) {
 	    nb = *lwork / ldwork;
 /* Computing MAX */
 	    i__1 = 2, i__2 = ilaenv_(&c__2, "ZGETRI", " ", n, &c_n1, &c_n1, &
 		    c_n1, (ftnlen)6, (ftnlen)1);
 	    nbmin = f2cmax(i__1,i__2);
 	}
    } else {
 	iws = *n;
    }

 /*     Solve the equation inv(A)*L = inv(U) for inv(A). */

    if (nb < nbmin || nb >= *n) {

 /*        Use unblocked code. */

 	for (j = *n; j >= 1; --j) {

 /*           Copy current column of L to WORK and replace with zeros. */

 	    i__1 = *n;
 	    for (i__ = j + 1; i__ <= i__1; ++i__) {
 		i__2 = i__;
 		i__3 = i__ + j * a_dim1;
 		work[i__2].r = a[i__3].r, work[i__2].i = a[i__3].i;
 		i__2 = i__ + j * a_dim1;
 		a[i__2].r = 0., a[i__2].i = 0.;
 /* L10: */
 	    }

 /*           Compute current column of inv(A). */

 	    if (j < *n) {
 		i__1 = *n - j;
 		z__1.r = -1., z__1.i = 0.;
 		zgemv_("No transpose", n, &i__1, &z__1, &a[(j + 1) * a_dim1 + 
 			1], lda, &work[j + 1], &c__1, &c_b2, &a[j * a_dim1 + 
 			1], &c__1);
 	    }
 /* L20: */
 	}
    } else {

 /*        Use blocked code. */

 	nn = (*n - 1) / nb * nb + 1;
 	i__1 = -nb;
 	for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
 /* Computing MIN */
 	    i__2 = nb, i__3 = *n - j + 1;
 	    jb = f2cmin(i__2,i__3);

 /*           Copy current block column of L to WORK and replace with */
 /*           zeros. */

 	    i__2 = j + jb - 1;
 	    for (jj = j; jj <= i__2; ++jj) {
 		i__3 = *n;
 		for (i__ = jj + 1; i__ <= i__3; ++i__) {
 		    i__4 = i__ + (jj - j) * ldwork;
 		    i__5 = i__ + jj * a_dim1;
 		    work[i__4].r = a[i__5].r, work[i__4].i = a[i__5].i;
 		    i__4 = i__ + jj * a_dim1;
 		    a[i__4].r = 0., a[i__4].i = 0.;
 /* L30: */
 		}
 /* L40: */
 	    }

 /*           Compute current block column of inv(A). */

 	    if (j + jb <= *n) {
 		i__2 = *n - j - jb + 1;
 		z__1.r = -1., z__1.i = 0.;
 		zgemm_("No transpose", "No transpose", n, &jb, &i__2, &z__1, &
 			a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &ldwork,
 			 &c_b2, &a[j * a_dim1 + 1], lda);
 	    }
 	    ztrsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b2, &
 		    work[j], &ldwork, &a[j * a_dim1 + 1], lda);
 /* L50: */
 	}
    }

 /*     Apply column interchanges. */

    for (j = *n - 1; j >= 1; --j) {
 	jp = ipiv[j];
 	if (jp != j) {
 	    zswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
 	}
 /* L60: */
    }

    work[1].r = (doublereal) iws, work[1].i = 0.;
    return 0;

 /*     End of ZGETRI */

 } /* zgetri_ */

--- a/lapack-netlib/SRC/zgetrs.c
+++ b/lapack-netlib/SRC/zgetrs.c
@@ -0,0 +1,621 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b ZGETRS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGETRS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgetrs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgetrs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgetrs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGETRS solves a system of linear equations */
 /* >    A * X = B,  A**T * X = B,  or  A**H * X = B */
 /* > with a general N-by-N matrix A using the LU factorization computed */
 /* > by ZGETRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          The factors L and U from the factorization A = P*L*U */
 /* >          as computed by ZGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from ZGETRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the right hand side matrix B. */
 /* >          On exit, the solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zgetrs_(char *trans, integer *n, integer *nrhs, 
 	doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, 
 	integer *ldb, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *), 
 	    xerbla_(char *, integer *, ftnlen);
    logical notran;
    extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *,
 	     integer *, integer *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGETRS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	return 0;
    }

    if (notran) {

 /*        Solve A * X = B. */

 /*        Apply row interchanges to the right hand sides. */

 	zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);

 /*        Solve L*X = B, overwriting B with X. */

 	ztrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b1, &a[
 		a_offset], lda, &b[b_offset], ldb);

 /*        Solve U*X = B, overwriting B with X. */

 	ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b1, &
 		a[a_offset], lda, &b[b_offset], ldb);
    } else {

 /*        Solve A**T * X = B  or A**H * X = B. */

 /*        Solve U**T *X = B or U**H *X = B, overwriting B with X. */

 	ztrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b1, &a[
 		a_offset], lda, &b[b_offset], ldb);

 /*        Solve L**T *X = B, or L**H *X = B overwriting B with X. */

 	ztrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b1, &a[a_offset], 
 		lda, &b[b_offset], ldb);

 /*        Apply row interchanges to the solution vectors. */

 	zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
    }

    return 0;

 /*     End of ZGETRS */

 } /* zgetrs_ */

--- a/lapack-netlib/SRC/zgetsls.c
+++ b/lapack-netlib/SRC/zgetsls.c
@@ -0,0 +1,944 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {0.,0.};
 static integer c_n1 = -1;
 static integer c_n2 = -2;
 static integer c__0 = 0;

 /* > \brief \b ZGETSLS */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, */
 /*     $                     WORK, LWORK, INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGETSLS solves overdetermined or underdetermined complex linear systems */
 /* > involving an M-by-N matrix A, using a tall skinny QR or short wide LQ */
 /* > factorization of A.  It is assumed that A has full rank. */
 /* > */
 /* > */
 /* > */
 /* > The following options are provided: */
 /* > */
 /* > 1. If TRANS = 'N' and m >= n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A*X ||. */
 /* > */
 /* > 2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
 /* >    an underdetermined system A * X = B. */
 /* > */
 /* > 3. If TRANS = 'C' and m >= n:  find the minimum norm solution of */
 /* >    an undetermined system A**T * X = B. */
 /* > */
 /* > 4. If TRANS = 'C' and m < n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A**T * X ||. */
 /* > */
 /* > Several right hand side vectors b and solution vectors x can be */
 /* > handled in a single call; they are stored as the columns of the */
 /* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
 /* > matrix X. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N': the linear system involves A; */
 /* >          = 'C': the linear system involves A**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of */
 /* >          columns of the matrices B and X. NRHS >=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >          A is overwritten by details of its QR or LQ */
 /* >          factorization as returned by ZGEQR or ZGELQ. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the matrix B of right hand side vectors, stored */
 /* >          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
 /* >          if TRANS = 'C'. */
 /* >          On exit, if INFO = 0, B is overwritten by the solution */
 /* >          vectors, stored columnwise: */
 /* >          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
 /* >          squares solution vectors. */
 /* >          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'C' and m >= n, rows 1 to M of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'C' and m < n, rows 1 to M of B contain the */
 /* >          least squares solution vectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= MAX(1,M,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
 /* >          or optimal, if query was assumed) LWORK. */
 /* >          See LWORK for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1 or -2, then a workspace query is assumed. */
 /* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
 /* >          optimal performance and returns this value in WORK(1). */
 /* >          If LWORK = -2, the routine calculates minimal size of WORK and */
 /* >          returns this value in WORK(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO =  i, the i-th diagonal element of the */
 /* >                triangular factor of A is zero, so that A does not have */
 /* >                full rank; the least squares solution could not be */
 /* >                computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup complex16GEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int zgetsls_(char *trans, integer *m, integer *n, integer *
 	nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, 
 	doublecomplex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
    real r__1;
    doublereal d__1;

    /* Local variables */
    doublereal anrm, bnrm;
    logical tran;
    integer brow, tszm, tszo, info2, i__, j, iascl, ibscl;
    extern logical lsame_(char *, char *);
    integer minmn, maxmn;
    extern /* Subroutine */ int zgelq_(integer *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
 	     integer *), zgeqr_(integer *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
 	     integer *);
    doublecomplex workq[1];
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    extern doublereal dlamch_(char *);
    doublecomplex tq[5];
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    integer scllen;
    doublereal bignum;
    extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, 
 	    integer *, doublereal *);
    extern /* Subroutine */ int zlascl_(char *, integer *, integer *, 
 	    doublereal *, doublereal *, integer *, integer *, doublecomplex *,
 	     integer *, integer *), zgemlq_(char *, char *, integer *,
 	     integer *, integer *, doublecomplex *, integer *, doublecomplex *
 	    , integer *, doublecomplex *, integer *, doublecomplex *, integer 
 	    *, integer *), zlaset_(char *, integer *, integer 
 	    *, doublecomplex *, doublecomplex *, doublecomplex *, integer *), zgemqr_(char *, char *, integer *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
    doublereal smlnum;
    integer wsizem, wsizeo;
    logical lquery;
    integer lw1, lw2;
    extern /* Subroutine */ int ztrtrs_(char *, char *, char *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
 	     integer *);
    integer mnk;
    doublereal dum[1];
    integer lwm, lwo;


 /*  -- LAPACK driver routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */



 /*  ===================================================================== */


 /*     Test the input arguments. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    minmn = f2cmin(*m,*n);
    maxmn = f2cmax(*m,*n);
    mnk = f2cmax(minmn,*nrhs);
    tran = lsame_(trans, "C");

    lquery = *lwork == -1 || *lwork == -2;
    if (! (lsame_(trans, "N") || lsame_(trans, "C"))) {
 	*info = -1;
    } else if (*m < 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -6;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m);
 	if (*ldb < f2cmax(i__1,*n)) {
 	    *info = -8;
 	}
    }

    if (*info == 0) {

 /*     Determine the block size and minimum LWORK */

 	if (*m >= *n) {
 	    zgeqr_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
 	    tszo = (integer) tq[0].r;
 	    lwo = (integer) workq[0].r;
 	    zgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszo, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwo, i__2 = (integer) workq[0].r;
 	    lwo = f2cmax(i__1,i__2);
 	    zgeqr_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
 	    tszm = (integer) tq[0].r;
 	    lwm = (integer) workq[0].r;
 	    zgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszm, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwm, i__2 = (integer) workq[0].r;
 	    lwm = f2cmax(i__1,i__2);
 	    wsizeo = tszo + lwo;
 	    wsizem = tszm + lwm;
 	} else {
 	    zgelq_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
 	    tszo = (integer) tq[0].r;
 	    lwo = (integer) workq[0].r;
 	    zgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszo, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwo, i__2 = (integer) workq[0].r;
 	    lwo = f2cmax(i__1,i__2);
 	    zgelq_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
 	    tszm = (integer) tq[0].r;
 	    lwm = (integer) workq[0].r;
 	    zgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszm, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwm, i__2 = (integer) workq[0].r;
 	    lwm = f2cmax(i__1,i__2);
 	    wsizeo = tszo + lwo;
 	    wsizem = tszm + lwm;
 	}

 	if (*lwork < wsizem && ! lquery) {
 	    *info = -10;
 	}

    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGETSLS", &i__1, (ftnlen)7);
 	d__1 = (doublereal) wsizeo;
 	work[1].r = d__1, work[1].i = 0.;
 	return 0;
    }
    if (lquery) {
 	if (*lwork == -1) {
 	    r__1 = (real) wsizeo;
 	    work[1].r = r__1, work[1].i = 0.f;
 	}
 	if (*lwork == -2) {
 	    r__1 = (real) wsizem;
 	    work[1].r = r__1, work[1].i = 0.f;
 	}
 	return 0;
    }
    if (*lwork < wsizeo) {
 	lw1 = tszm;
 	lw2 = lwm;
    } else {
 	lw1 = tszo;
 	lw2 = lwo;
    }

 /*     Quick return if possible */

 /* Computing MIN */
    i__1 = f2cmin(*m,*n);
    if (f2cmin(i__1,*nrhs) == 0) {
 	i__1 = f2cmax(*m,*n);
 	zlaset_("FULL", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb);
 	return 0;
    }

 /*     Get machine parameters */

    smlnum = dlamch_("S") / dlamch_("P");
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);

 /*     Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */

    anrm = zlange_("M", m, n, &a[a_offset], lda, dum);
    iascl = 0;
    if (anrm > 0. && anrm < smlnum) {

 /*        Scale matrix norm up to SMLNUM */

 	zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 1;
    } else if (anrm > bignum) {

 /*        Scale matrix norm down to BIGNUM */

 	zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 2;
    } else if (anrm == 0.) {

 /*        Matrix all zero. Return zero solution. */

 	zlaset_("F", &maxmn, nrhs, &c_b1, &c_b1, &b[b_offset], ldb)
 		;
 	goto L50;
    }

    brow = *m;
    if (tran) {
 	brow = *n;
    }
    bnrm = zlange_("M", &brow, nrhs, &b[b_offset], ldb, dum);
    ibscl = 0;
    if (bnrm > 0. && bnrm < smlnum) {

 /*        Scale matrix norm up to SMLNUM */

 	zlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 1;
    } else if (bnrm > bignum) {

 /*        Scale matrix norm down to BIGNUM */

 	zlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 2;
    }

    if (*m >= *n) {

 /*        compute QR factorization of A */

 	zgeqr_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
 		info);
 	if (! tran) {

 /*           Least-Squares Problem f2cmin || A * X - B || */

 /*           B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */

 	    zgemqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);

 /*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */

 	    ztrtrs_("U", "N", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 	    scllen = *n;
 	} else {

 /*           Overdetermined system of equations A**T * X = B */

 /*           B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */

 	    ztrtrs_("U", "C", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 /*           B(N+1:M,1:NRHS) = CZERO */

 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = *n + 1; i__ <= i__2; ++i__) {
 		    i__3 = i__ + j * b_dim1;
 		    b[i__3].r = 0., b[i__3].i = 0.;
 /* L10: */
 		}
 /* L20: */
 	    }

 /*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */

 	    zgemqr_("L", "N", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);

 	    scllen = *m;

 	}

    } else {

 /*        Compute LQ factorization of A */

 	zgelq_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
 		info);

 /*        workspace at least M, optimally M*NB. */

 	if (! tran) {

 /*           underdetermined system of equations A * X = B */

 /*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */

 	    ztrtrs_("L", "N", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 /*           B(M+1:N,1:NRHS) = 0 */

 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *n;
 		for (i__ = *m + 1; i__ <= i__2; ++i__) {
 		    i__3 = i__ + j * b_dim1;
 		    b[i__3].r = 0., b[i__3].i = 0.;
 /* L30: */
 		}
 /* L40: */
 	    }

 /*           B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */

 	    zgemlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);

 /*           workspace at least NRHS, optimally NRHS*NB */

 	    scllen = *n;

 	} else {

 /*           overdetermined system f2cmin || A**T * X - B || */

 /*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */

 	    zgemlq_("L", "N", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);

 /*           workspace at least NRHS, optimally NRHS*NB */

 /*           B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */

 	    ztrtrs_("L", "C", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 	    scllen = *m;

 	}

    }

 /*     Undo scaling */

    if (iascl == 1) {
 	zlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (iascl == 2) {
 	zlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }
    if (ibscl == 1) {
 	zlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (ibscl == 2) {
 	zlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }

 L50:
    d__1 = (doublereal) (tszo + lwo);
    work[1].r = d__1, work[1].i = 0.;
    return 0;

 /*     End of ZGETSLS */

 } /* zgetsls_ */

--- a/lapack-netlib/SRC/zgetsqrhrt.c
+++ b/lapack-netlib/SRC/zgetsqrhrt.c
@@ -0,0 +1,779 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b ZGETSQRHRT */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGETSQRHRT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgetsqr
 hrt.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgetsqr
 hrt.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgetsqr
 hrt.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, */
 /*      $                       LWORK, INFO ) */
 /*       IMPLICIT NONE */

 /*       INTEGER           INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 */
 /*       COMPLEX*16        A( LDA, * ), T( LDT, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGETSQRHRT computes a NB2-sized column blocked QR-factorization */
 /* > of a complex M-by-N matrix A with M >= N, */
 /* > */
 /* >    A = Q * R. */
 /* > */
 /* > The routine uses internally a NB1-sized column blocked and MB1-sized */
 /* > row blocked TSQR-factorization and perfors the reconstruction */
 /* > of the Householder vectors from the TSQR output. The routine also */
 /* > converts the R_tsqr factor from the TSQR-factorization output into */
 /* > the R factor that corresponds to the Householder QR-factorization, */
 /* > */
 /* >    A = Q_tsqr * R_tsqr = Q * R. */
 /* > */
 /* > The output Q and R factors are stored in the same format as in ZGEQRT */
 /* > (Q is in blocked compact WY-representation). See the documentation */
 /* > of ZGEQRT for more details on the format. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A. M >= N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] MB1 */
 /* > \verbatim */
 /* >          MB1 is INTEGER */
 /* >          The row block size to be used in the blocked TSQR. */
 /* >          MB1 > N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB1 */
 /* > \verbatim */
 /* >          NB1 is INTEGER */
 /* >          The column block size to be used in the blocked TSQR. */
 /* >          N >= NB1 >= 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB2 */
 /* > \verbatim */
 /* >          NB2 is INTEGER */
 /* >          The block size to be used in the blocked QR that is */
 /* >          output. NB2 >= 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* > */
 /* >          On entry: an M-by-N matrix A. */
 /* > */
 /* >          On exit: */
 /* >           a) the elements on and above the diagonal */
 /* >              of the array contain the N-by-N upper-triangular */
 /* >              matrix R corresponding to the Householder QR; */
 /* >           b) the elements below the diagonal represent Q by */
 /* >              the columns of blocked V (compact WY-representation). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is COMPLEX*16 array, dimension (LDT,N)) */
 /* >          The upper triangular block reflectors stored in compact form */
 /* >          as a sequence of upper triangular blocks. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= NB2. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          The dimension of the array WORK. */
 /* >          LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ), */
 /* >          where */
 /* >             NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)), */
 /* >             NB1LOCAL = MIN(NB1,N). */
 /* >             LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL, */
 /* >             LW1 = NB1LOCAL * N, */
 /* >             LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ), */
 /* >          If LWORK = -1, then a workspace query is assumed. */
 /* >          The routine only calculates the optimal size of the WORK */
 /* >          array, returns this value as the first entry of the WORK */
 /* >          array, and no error message related to LWORK is issued */
 /* >          by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \ingroup comlpex16OTHERcomputational */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > November 2020, Igor Kozachenko, */
 /* >                Computer Science Division, */
 /* >                University of California, Berkeley */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zgetsqrhrt_(integer *m, integer *n, integer *mb1, 
 	integer *nb1, integer *nb2, doublecomplex *a, integer *lda, 
 	doublecomplex *t, integer *ldt, doublecomplex *work, integer *lwork, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4;
    doublereal d__1, d__2, d__3;
    doublecomplex z__1, z__2;

    /* Local variables */
    integer ldwt, lworkopt, i__, j, iinfo;
    extern /* Subroutine */ int zungtsqr_row_(integer *, integer *, integer *
 	    , integer *, doublecomplex *, integer *, doublecomplex *, integer 
 	    *, doublecomplex *, integer *, integer *), zcopy_(integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *), 
 	    zunhr_col_(integer *, integer *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *)
 	    , xerbla_(char *, integer *, ftnlen);
    logical lquery;
    integer lw1, lw2, num_all_row_blocks__, lwt, nb1local, nb2local;
    extern /* Subroutine */ int zlatsqr_(integer *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
 	     doublecomplex *, integer *, integer *);


 /*  -- LAPACK computational routine -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0 || *m < *n) {
 	*info = -2;
    } else if (*mb1 <= *n) {
 	*info = -3;
    } else if (*nb1 < 1) {
 	*info = -4;
    } else if (*nb2 < 1) {
 	*info = -5;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -7;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = f2cmin(*nb2,*n);
 	if (*ldt < f2cmax(i__1,i__2)) {
 	    *info = -9;
 	} else {

 /*        Test the input LWORK for the dimension of the array WORK. */
 /*        This workspace is used to store array: */
 /*        a) Matrix T and WORK for ZLATSQR; */
 /*        b) N-by-N upper-triangular factor R_tsqr; */
 /*        c) Matrix T and array WORK for ZUNGTSQR_ROW; */
 /*        d) Diagonal D for ZUNHR_COL. */

 	    if (*lwork < *n * *n + 1 && ! lquery) {
 		*info = -11;
 	    } else {

 /*           Set block size for column blocks */

 		nb1local = f2cmin(*nb1,*n);

 /* Computing MAX */
 		d__3 = (doublereal) (*m - *n) / (doublereal) (*mb1 - *n) + 
 			.5f;
 		d__1 = 1., d__2 = d_int(&d__3);
 		num_all_row_blocks__ = (integer) f2cmax(d__1,d__2);

 /*           Length and leading dimension of WORK array to place */
 /*           T array in TSQR. */

 		lwt = num_all_row_blocks__ * *n * nb1local;
 		ldwt = nb1local;

 /*           Length of TSQR work array */

 		lw1 = nb1local * *n;

 /*           Length of ZUNGTSQR_ROW work array. */

 /* Computing MAX */
 		i__1 = nb1local, i__2 = *n - nb1local;
 		lw2 = nb1local * f2cmax(i__1,i__2);

 /* Computing MAX */
 /* Computing MAX */
 		i__3 = lwt + *n * *n + lw2, i__4 = lwt + *n * *n + *n;
 		i__1 = lwt + lw1, i__2 = f2cmax(i__3,i__4);
 		lworkopt = f2cmax(i__1,i__2);

 		if (*lwork < f2cmax(1,lworkopt) && ! lquery) {
 		    *info = -11;
 		}

 	    }
 	}
    }

 /*     Handle error in the input parameters and return workspace query. */

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGETSQRHRT", &i__1, (ftnlen)10);
 	return 0;
    } else if (lquery) {
 	z__1.r = (doublereal) lworkopt, z__1.i = 0.;
 	work[1].r = z__1.r, work[1].i = z__1.i;
 	return 0;
    }

 /*     Quick return if possible */

    if (f2cmin(*m,*n) == 0) {
 	z__1.r = (doublereal) lworkopt, z__1.i = 0.;
 	work[1].r = z__1.r, work[1].i = z__1.i;
 	return 0;
    }

    nb2local = f2cmin(*nb2,*n);


 /*     (1) Perform TSQR-factorization of the M-by-N matrix A. */

    zlatsqr_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, &work[
 	    lwt + 1], &lw1, &iinfo);

 /*     (2) Copy the factor R_tsqr stored in the upper-triangular part */
 /*         of A into the square matrix in the work array */
 /*         WORK(LWT+1:LWT+N*N) column-by-column. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	zcopy_(&j, &a[j * a_dim1 + 1], &c__1, &work[lwt + *n * (j - 1) + 1], &
 		c__1);
    }

 /*     (3) Generate a M-by-N matrix Q with orthonormal columns from */
 /*     the result stored below the diagonal in the array A in place. */

    zungtsqr_row_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, &
 	    work[lwt + *n * *n + 1], &lw2, &iinfo);

 /*     (4) Perform the reconstruction of Householder vectors from */
 /*     the matrix Q (stored in A) in place. */

    zunhr_col_(m, n, &nb2local, &a[a_offset], lda, &t[t_offset], ldt, &work[
 	    lwt + *n * *n + 1], &iinfo);

 /*     (5) Copy the factor R_tsqr stored in the square matrix in the */
 /*     work array WORK(LWT+1:LWT+N*N) into the upper-triangular */
 /*     part of A. */

 /*     (6) Compute from R_tsqr the factor R_hr corresponding to */
 /*     the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr. */
 /*     This multiplication by the sign matrix S on the left means */
 /*     changing the sign of I-th row of the matrix R_tsqr according */
 /*     to sign of the I-th diagonal element DIAG(I) of the matrix S. */
 /*     DIAG is stored in WORK( LWT+N*N+1 ) from the ZUNHR_COL output. */

 /*     (5) and (6) can be combined in a single loop, so the rows in A */
 /*     are accessed only once. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = lwt + *n * *n + i__;
 	z__1.r = -1., z__1.i = 0.;
 	if (work[i__2].r == z__1.r && work[i__2].i == z__1.i) {
 	    i__2 = *n;
 	    for (j = i__; j <= i__2; ++j) {
 		i__3 = i__ + j * a_dim1;
 		z__2.r = -1., z__2.i = 0.;
 		i__4 = lwt + *n * (j - 1) + i__;
 		z__1.r = z__2.r * work[i__4].r - z__2.i * work[i__4].i, 
 			z__1.i = z__2.r * work[i__4].i + z__2.i * work[i__4]
 			.r;
 		a[i__3].r = z__1.r, a[i__3].i = z__1.i;
 	    }
 	} else {
 	    i__2 = *n - i__ + 1;
 	    zcopy_(&i__2, &work[lwt + *n * (i__ - 1) + i__], n, &a[i__ + i__ *
 		     a_dim1], lda);
 	}
    }

    z__1.r = (doublereal) lworkopt, z__1.i = 0.;
    work[1].r = z__1.r, work[1].i = z__1.i;
    return 0;

 /*     End of ZGETSQRHRT */

 } /* zgetsqrhrt_ */

--- a/lapack-netlib/SRC/zggbak.c
+++ b/lapack-netlib/SRC/zggbak.c
@@ -0,0 +1,720 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b ZGGBAK */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGGBAK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zggbak.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zggbak.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zggbak.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, */
 /*                          LDV, INFO ) */

 /*       CHARACTER          JOB, SIDE */
 /*       INTEGER            IHI, ILO, INFO, LDV, M, N */
 /*       DOUBLE PRECISION   LSCALE( * ), RSCALE( * ) */
 /*       COMPLEX*16         V( LDV, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGGBAK forms the right or left eigenvectors of a complex generalized */
 /* > eigenvalue problem A*x = lambda*B*x, by backward transformation on */
 /* > the computed eigenvectors of the balanced pair of matrices output by */
 /* > ZGGBAL. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOB */
 /* > \verbatim */
 /* >          JOB is CHARACTER*1 */
 /* >          Specifies the type of backward transformation required: */
 /* >          = 'N':  do nothing, return immediately; */
 /* >          = 'P':  do backward transformation for permutation only; */
 /* >          = 'S':  do backward transformation for scaling only; */
 /* >          = 'B':  do backward transformations for both permutation and */
 /* >                  scaling. */
 /* >          JOB must be the same as the argument JOB supplied to ZGGBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'R':  V contains right eigenvectors; */
 /* >          = 'L':  V contains left eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrix V.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* >          The integers ILO and IHI determined by ZGGBAL. */
 /* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LSCALE */
 /* > \verbatim */
 /* >          LSCALE is DOUBLE PRECISION array, dimension (N) */
 /* >          Details of the permutations and/or scaling factors applied */
 /* >          to the left side of A and B, as returned by ZGGBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] RSCALE */
 /* > \verbatim */
 /* >          RSCALE is DOUBLE PRECISION array, dimension (N) */
 /* >          Details of the permutations and/or scaling factors applied */
 /* >          to the right side of A and B, as returned by ZGGBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix V.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] V */
 /* > \verbatim */
 /* >          V is COMPLEX*16 array, dimension (LDV,M) */
 /* >          On entry, the matrix of right or left eigenvectors to be */
 /* >          transformed, as returned by ZTGEVC. */
 /* >          On exit, V is overwritten by the transformed eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the matrix V. LDV >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GBcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  See R.C. Ward, Balancing the generalized eigenvalue problem, */
 /* >                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zggbak_(char *job, char *side, integer *n, integer *ilo, 
 	integer *ihi, doublereal *lscale, doublereal *rscale, integer *m, 
 	doublecomplex *v, integer *ldv, integer *info)
 {
    /* System generated locals */
    integer v_dim1, v_offset, i__1;

    /* Local variables */
    integer i__, k;
    extern logical lsame_(char *, char *);
    logical leftv;
    extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *), xerbla_(char *, integer *, ftnlen), 
 	    zdscal_(integer *, doublereal *, doublecomplex *, integer *);
    logical rightv;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    --lscale;
    --rscale;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;

    /* Function Body */
    rightv = lsame_(side, "R");
    leftv = lsame_(side, "L");

    *info = 0;
    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
 	    && ! lsame_(job, "B")) {
 	*info = -1;
    } else if (! rightv && ! leftv) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ilo < 1) {
 	*info = -4;
    } else if (*n == 0 && *ihi == 0 && *ilo != 1) {
 	*info = -4;
    } else if (*n > 0 && (*ihi < *ilo || *ihi > f2cmax(1,*n))) {
 	*info = -5;
    } else if (*n == 0 && *ilo == 1 && *ihi != 0) {
 	*info = -5;
    } else if (*m < 0) {
 	*info = -8;
    } else if (*ldv < f2cmax(1,*n)) {
 	*info = -10;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGGBAK", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }
    if (*m == 0) {
 	return 0;
    }
    if (lsame_(job, "N")) {
 	return 0;
    }

    if (*ilo == *ihi) {
 	goto L30;
    }

 /*     Backward balance */

    if (lsame_(job, "S") || lsame_(job, "B")) {

 /*        Backward transformation on right eigenvectors */

 	if (rightv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
 		zdscal_(m, &rscale[i__], &v[i__ + v_dim1], ldv);
 /* L10: */
 	    }
 	}

 /*        Backward transformation on left eigenvectors */

 	if (leftv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
 		zdscal_(m, &lscale[i__], &v[i__ + v_dim1], ldv);
 /* L20: */
 	    }
 	}
    }

 /*     Backward permutation */

 L30:
    if (lsame_(job, "P") || lsame_(job, "B")) {

 /*        Backward permutation on right eigenvectors */

 	if (rightv) {
 	    if (*ilo == 1) {
 		goto L50;
 	    }
 	    for (i__ = *ilo - 1; i__ >= 1; --i__) {
 		k = (integer) rscale[i__];
 		if (k == i__) {
 		    goto L40;
 		}
 		zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L40:
 		;
 	    }

 L50:
 	    if (*ihi == *n) {
 		goto L70;
 	    }
 	    i__1 = *n;
 	    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
 		k = (integer) rscale[i__];
 		if (k == i__) {
 		    goto L60;
 		}
 		zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L60:
 		;
 	    }
 	}

 /*        Backward permutation on left eigenvectors */

 L70:
 	if (leftv) {
 	    if (*ilo == 1) {
 		goto L90;
 	    }
 	    for (i__ = *ilo - 1; i__ >= 1; --i__) {
 		k = (integer) lscale[i__];
 		if (k == i__) {
 		    goto L80;
 		}
 		zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L80:
 		;
 	    }

 L90:
 	    if (*ihi == *n) {
 		goto L110;
 	    }
 	    i__1 = *n;
 	    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
 		k = (integer) lscale[i__];
 		if (k == i__) {
 		    goto L100;
 		}
 		zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L100:
 		;
 	    }
 	}
    }

 L110:

    return 0;

 /*     End of ZGGBAK */

 } /* zggbak_ */

--- a/lapack-netlib/SRC/zggbal.c
+++ b/lapack-netlib/SRC/zggbal.c
--- a/lapack-netlib/SRC/zgges.c
+++ b/lapack-netlib/SRC/zgges.c
--- a/lapack-netlib/SRC/zgges3.c
+++ b/lapack-netlib/SRC/zgges3.c
--- a/lapack-netlib/SRC/zggesx.c
+++ b/lapack-netlib/SRC/zggesx.c
--- a/lapack-netlib/SRC/zggev.c
+++ b/lapack-netlib/SRC/zggev.c
--- a/lapack-netlib/SRC/zggev3.c
+++ b/lapack-netlib/SRC/zggev3.c
--- a/lapack-netlib/SRC/zggevx.c
+++ b/lapack-netlib/SRC/zggevx.c
--- a/lapack-netlib/SRC/zggglm.c
+++ b/lapack-netlib/SRC/zggglm.c
@@ -0,0 +1,801 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b2 = {1.,0.};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b ZGGGLM */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGGGLM + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zggglm.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zggglm.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zggglm.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, */
 /*                          INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), */
 /*      $                   X( * ), Y( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGGGLM solves a general Gauss-Markov linear model (GLM) problem: */
 /* > */
 /* >         minimize || y ||_2   subject to   d = A*x + B*y */
 /* >             x */
 /* > */
 /* > where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */
 /* > given N-vector. It is assumed that M <= N <= M+P, and */
 /* > */
 /* >            rank(A) = M    and    rank( A B ) = N. */
 /* > */
 /* > Under these assumptions, the constrained equation is always */
 /* > consistent, and there is a unique solution x and a minimal 2-norm */
 /* > solution y, which is obtained using a generalized QR factorization */
 /* > of the matrices (A, B) given by */
 /* > */
 /* >    A = Q*(R),   B = Q*T*Z. */
 /* >          (0) */
 /* > */
 /* > In particular, if matrix B is square nonsingular, then the problem */
 /* > GLM is equivalent to the following weighted linear least squares */
 /* > problem */
 /* > */
 /* >              minimize || inv(B)*(d-A*x) ||_2 */
 /* >                  x */
 /* > */
 /* > where inv(B) denotes the inverse of B. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix A.  0 <= M <= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of columns of the matrix B.  P >= N-M. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,M) */
 /* >          On entry, the N-by-M matrix A. */
 /* >          On exit, the upper triangular part of the array A contains */
 /* >          the M-by-M upper triangular matrix R. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,P) */
 /* >          On entry, the N-by-P matrix B. */
 /* >          On exit, if N <= P, the upper triangle of the subarray */
 /* >          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */
 /* >          if N > P, the elements on and above the (N-P)th subdiagonal */
 /* >          contain the N-by-P upper trapezoidal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is COMPLEX*16 array, dimension (N) */
 /* >          On entry, D is the left hand side of the GLM equation. */
 /* >          On exit, D is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is COMPLEX*16 array, dimension (M) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Y */
 /* > \verbatim */
 /* >          Y is COMPLEX*16 array, dimension (P) */
 /* > */
 /* >          On exit, X and Y are the solutions of the GLM problem. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,N+M+P). */
 /* >          For optimum performance, LWORK >= M+f2cmin(N,P)+f2cmax(N,P)*NB, */
 /* >          where NB is an upper bound for the optimal blocksizes for */
 /* >          ZGEQRF, ZGERQF, ZUNMQR and ZUNMRQ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          = 1:  the upper triangular factor R associated with A in the */
 /* >                generalized QR factorization of the pair (A, B) is */
 /* >                singular, so that rank(A) < M; the least squares */
 /* >                solution could not be computed. */
 /* >          = 2:  the bottom (N-M) by (N-M) part of the upper trapezoidal */
 /* >                factor T associated with B in the generalized QR */
 /* >                factorization of the pair (A, B) is singular, so that */
 /* >                rank( A B ) < N; the least squares solution could not */
 /* >                be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16OTHEReigen */

 /*  ===================================================================== */
 /* Subroutine */ int zggglm_(integer *n, integer *m, integer *p, 
 	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, 
 	doublecomplex *d__, doublecomplex *x, doublecomplex *y, doublecomplex 
 	*work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    doublecomplex z__1;

    /* Local variables */
    integer lopt, i__;
    extern /* Subroutine */ int zgemv_(char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *), 
 	    zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *);
    integer nb, np;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zggqrf_(integer *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, integer *)
 	    ;
    integer lwkmin, nb1, nb2, nb3, nb4, lwkopt;
    logical lquery;
    extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmrq_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), ztrtrs_(char *, char *, char *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
 	     integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  =================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --d__;
    --x;
    --y;
    --work;

    /* Function Body */
    *info = 0;
    np = f2cmin(*n,*p);
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*m < 0 || *m > *n) {
 	*info = -2;
    } else if (*p < 0 || *p < *n - *m) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }

 /*     Calculate workspace */

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkmin = 1;
 	    lwkopt = 1;
 	} else {
 	    nb1 = ilaenv_(&c__1, "ZGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb2 = ilaenv_(&c__1, "ZGERQF", " ", n, m, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb3 = ilaenv_(&c__1, "ZUNMQR", " ", n, m, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	    nb4 = ilaenv_(&c__1, "ZUNMRQ", " ", n, m, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 /* Computing MAX */
 	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
 	    nb = f2cmax(i__1,nb4);
 	    lwkmin = *m + *n + *p;
 	    lwkopt = *m + np + f2cmax(*n,*p) * nb;
 	}
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;

 	if (*lwork < lwkmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGGGLM", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    x[i__2].r = 0., x[i__2].i = 0.;
 	}
 	i__1 = *p;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    y[i__2].r = 0., y[i__2].i = 0.;
 	}
 	return 0;
    }

 /*     Compute the GQR factorization of matrices A and B: */

 /*          Q**H*A = ( R11 ) M,    Q**H*B*Z**H = ( T11   T12 ) M */
 /*                   (  0  ) N-M                 (  0    T22 ) N-M */
 /*                      M                         M+P-N  N-M */

 /*     where R11 and T22 are upper triangular, and Q and Z are */
 /*     unitary. */

    i__1 = *lwork - *m - np;
    zggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m 
 	    + 1], &work[*m + np + 1], &i__1, info);
    i__1 = *m + np + 1;
    lopt = (integer) work[i__1].r;

 /*     Update left-hand-side vector d = Q**H*d = ( d1 ) M */
 /*                                               ( d2 ) N-M */

    i__1 = f2cmax(1,*n);
    i__2 = *lwork - *m - np;
    zunmqr_("Left", "Conjugate transpose", n, &c__1, m, &a[a_offset], lda, &
 	    work[1], &d__[1], &i__1, &work[*m + np + 1], &i__2, info);
 /* Computing MAX */
    i__3 = *m + np + 1;
    i__1 = lopt, i__2 = (integer) work[i__3].r;
    lopt = f2cmax(i__1,i__2);

 /*     Solve T22*y2 = d2 for y2 */

    if (*n > *m) {
 	i__1 = *n - *m;
 	i__2 = *n - *m;
 	ztrtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 
 		+ (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, 
 		info);

 	if (*info > 0) {
 	    *info = 1;
 	    return 0;
 	}

 	i__1 = *n - *m;
 	zcopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1);
    }

 /*     Set y1 = 0 */

    i__1 = *m + *p - *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	y[i__2].r = 0., y[i__2].i = 0.;
 /* L10: */
    }

 /*     Update d1 = d1 - T12*y2 */

    i__1 = *n - *m;
    z__1.r = -1., z__1.i = 0.;
    zgemv_("No transpose", m, &i__1, &z__1, &b[(*m + *p - *n + 1) * b_dim1 + 
 	    1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b2, &d__[1], &c__1);

 /*     Solve triangular system: R11*x = d1 */

    if (*m > 0) {
 	ztrtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], 
 		lda, &d__[1], m, info);

 	if (*info > 0) {
 	    *info = 2;
 	    return 0;
 	}

 /*        Copy D to X */

 	zcopy_(m, &d__[1], &c__1, &x[1], &c__1);
    }

 /*     Backward transformation y = Z**H *y */

 /* Computing MAX */
    i__1 = 1, i__2 = *n - *p + 1;
    i__3 = f2cmax(1,*p);
    i__4 = *lwork - *m - np;
    zunmrq_("Left", "Conjugate transpose", p, &c__1, &np, &b[f2cmax(i__1,i__2) + 
 	    b_dim1], ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &
 	    i__4, info);
 /* Computing MAX */
    i__4 = *m + np + 1;
    i__2 = lopt, i__3 = (integer) work[i__4].r;
    i__1 = *m + np + f2cmax(i__2,i__3);
    work[1].r = (doublereal) i__1, work[1].i = 0.;

    return 0;

 /*     End of ZGGGLM */

 } /* zggglm_ */

--- a/lapack-netlib/SRC/zgghd3.c
+++ b/lapack-netlib/SRC/zgghd3.c
--- a/lapack-netlib/SRC/zgghrd.c
+++ b/lapack-netlib/SRC/zgghrd.c
@@ -0,0 +1,788 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static doublecomplex c_b2 = {0.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZGGHRD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGGHRD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgghrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgghrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgghrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */
 /*                          LDQ, Z, LDZ, INFO ) */

 /*       CHARACTER          COMPQ, COMPZ */
 /*       INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGGHRD reduces a pair of complex matrices (A,B) to generalized upper */
 /* > Hessenberg form using unitary transformations, where A is a */
 /* > general matrix and B is upper triangular.  The form of the */
 /* > generalized eigenvalue problem is */
 /* >    A*x = lambda*B*x, */
 /* > and B is typically made upper triangular by computing its QR */
 /* > factorization and moving the unitary matrix Q to the left side */
 /* > of the equation. */
 /* > */
 /* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */
 /* >    Q**H*A*Z = H */
 /* > and transforms B to another upper triangular matrix T: */
 /* >    Q**H*B*Z = T */
 /* > in order to reduce the problem to its standard form */
 /* >    H*y = lambda*T*y */
 /* > where y = Z**H*x. */
 /* > */
 /* > The unitary matrices Q and Z are determined as products of Givens */
 /* > rotations.  They may either be formed explicitly, or they may be */
 /* > postmultiplied into input matrices Q1 and Z1, so that */
 /* >      Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H */
 /* >      Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H */
 /* > If Q1 is the unitary matrix from the QR factorization of B in the */
 /* > original equation A*x = lambda*B*x, then ZGGHRD reduces the original */
 /* > problem to generalized Hessenberg form. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] COMPQ */
 /* > \verbatim */
 /* >          COMPQ is CHARACTER*1 */
 /* >          = 'N': do not compute Q; */
 /* >          = 'I': Q is initialized to the unit matrix, and the */
 /* >                 unitary matrix Q is returned; */
 /* >          = 'V': Q must contain a unitary matrix Q1 on entry, */
 /* >                 and the product Q1*Q is returned. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] COMPZ */
 /* > \verbatim */
 /* >          COMPZ is CHARACTER*1 */
 /* >          = 'N': do not compute Z; */
 /* >          = 'I': Z is initialized to the unit matrix, and the */
 /* >                 unitary matrix Z is returned; */
 /* >          = 'V': Z must contain a unitary matrix Z1 on entry, */
 /* >                 and the product Z1*Z is returned. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* > */
 /* >          ILO and IHI mark the rows and columns of A which are to be */
 /* >          reduced.  It is assumed that A is already upper triangular */
 /* >          in rows and columns 1:ILO-1 and IHI+1:N.  ILO and IHI are */
 /* >          normally set by a previous call to ZGGBAL; otherwise they */
 /* >          should be set to 1 and N respectively. */
 /* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the N-by-N general matrix to be reduced. */
 /* >          On exit, the upper triangle and the first subdiagonal of A */
 /* >          are overwritten with the upper Hessenberg matrix H, and the */
 /* >          rest is set to zero. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB, N) */
 /* >          On entry, the N-by-N upper triangular matrix B. */
 /* >          On exit, the upper triangular matrix T = Q**H B Z.  The */
 /* >          elements below the diagonal are set to zero. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] Q */
 /* > \verbatim */
 /* >          Q is COMPLEX*16 array, dimension (LDQ, N) */
 /* >          On entry, if COMPQ = 'V', the unitary matrix Q1, typically */
 /* >          from the QR factorization of B. */
 /* >          On exit, if COMPQ='I', the unitary matrix Q, and if */
 /* >          COMPQ = 'V', the product Q1*Q. */
 /* >          Not referenced if COMPQ='N'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDQ */
 /* > \verbatim */
 /* >          LDQ is INTEGER */
 /* >          The leading dimension of the array Q. */
 /* >          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX*16 array, dimension (LDZ, N) */
 /* >          On entry, if COMPZ = 'V', the unitary matrix Z1. */
 /* >          On exit, if COMPZ='I', the unitary matrix Z, and if */
 /* >          COMPZ = 'V', the product Z1*Z. */
 /* >          Not referenced if COMPZ='N'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z. */
 /* >          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16OTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  This routine reduces A to Hessenberg and B to triangular form by */
 /* >  an unblocked reduction, as described in _Matrix_Computations_, */
 /* >  by Golub and van Loan (Johns Hopkins Press). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zgghrd_(char *compq, char *compz, integer *n, integer *
 	ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *b, 
 	integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, 
 	integer *ldz, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, 
 	    z_offset, i__1, i__2, i__3;
    doublecomplex z__1;

    /* Local variables */
    integer jcol, jrow;
    extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublecomplex *);
    doublereal c__;
    doublecomplex s;
    extern logical lsame_(char *, char *);
    doublecomplex ctemp;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    integer icompq, icompz;
    extern /* Subroutine */ int zlaset_(char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, doublecomplex *, integer *), zlartg_(doublecomplex *, doublecomplex *, doublereal *, 
 	    doublecomplex *, doublecomplex *);
    logical ilq, ilz;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Decode COMPQ */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;

    /* Function Body */
    if (lsame_(compq, "N")) {
 	ilq = FALSE_;
 	icompq = 1;
    } else if (lsame_(compq, "V")) {
 	ilq = TRUE_;
 	icompq = 2;
    } else if (lsame_(compq, "I")) {
 	ilq = TRUE_;
 	icompq = 3;
    } else {
 	icompq = 0;
    }

 /*     Decode COMPZ */

    if (lsame_(compz, "N")) {
 	ilz = FALSE_;
 	icompz = 1;
    } else if (lsame_(compz, "V")) {
 	ilz = TRUE_;
 	icompz = 2;
    } else if (lsame_(compz, "I")) {
 	ilz = TRUE_;
 	icompz = 3;
    } else {
 	icompz = 0;
    }

 /*     Test the input parameters. */

    *info = 0;
    if (icompq <= 0) {
 	*info = -1;
    } else if (icompz <= 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ilo < 1) {
 	*info = -4;
    } else if (*ihi > *n || *ihi < *ilo - 1) {
 	*info = -5;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -9;
    } else if (ilq && *ldq < *n || *ldq < 1) {
 	*info = -11;
    } else if (ilz && *ldz < *n || *ldz < 1) {
 	*info = -13;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGGHRD", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Initialize Q and Z if desired. */

    if (icompq == 3) {
 	zlaset_("Full", n, n, &c_b2, &c_b1, &q[q_offset], ldq);
    }
    if (icompz == 3) {
 	zlaset_("Full", n, n, &c_b2, &c_b1, &z__[z_offset], ldz);
    }

 /*     Quick return if possible */

    if (*n <= 1) {
 	return 0;
    }

 /*     Zero out lower triangle of B */

    i__1 = *n - 1;
    for (jcol = 1; jcol <= i__1; ++jcol) {
 	i__2 = *n;
 	for (jrow = jcol + 1; jrow <= i__2; ++jrow) {
 	    i__3 = jrow + jcol * b_dim1;
 	    b[i__3].r = 0., b[i__3].i = 0.;
 /* L10: */
 	}
 /* L20: */
    }

 /*     Reduce A and B */

    i__1 = *ihi - 2;
    for (jcol = *ilo; jcol <= i__1; ++jcol) {

 	i__2 = jcol + 2;
 	for (jrow = *ihi; jrow >= i__2; --jrow) {

 /*           Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */

 	    i__3 = jrow - 1 + jcol * a_dim1;
 	    ctemp.r = a[i__3].r, ctemp.i = a[i__3].i;
 	    zlartg_(&ctemp, &a[jrow + jcol * a_dim1], &c__, &s, &a[jrow - 1 + 
 		    jcol * a_dim1]);
 	    i__3 = jrow + jcol * a_dim1;
 	    a[i__3].r = 0., a[i__3].i = 0.;
 	    i__3 = *n - jcol;
 	    zrot_(&i__3, &a[jrow - 1 + (jcol + 1) * a_dim1], lda, &a[jrow + (
 		    jcol + 1) * a_dim1], lda, &c__, &s);
 	    i__3 = *n + 2 - jrow;
 	    zrot_(&i__3, &b[jrow - 1 + (jrow - 1) * b_dim1], ldb, &b[jrow + (
 		    jrow - 1) * b_dim1], ldb, &c__, &s);
 	    if (ilq) {
 		d_cnjg(&z__1, &s);
 		zrot_(n, &q[(jrow - 1) * q_dim1 + 1], &c__1, &q[jrow * q_dim1 
 			+ 1], &c__1, &c__, &z__1);
 	    }

 /*           Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */

 	    i__3 = jrow + jrow * b_dim1;
 	    ctemp.r = b[i__3].r, ctemp.i = b[i__3].i;
 	    zlartg_(&ctemp, &b[jrow + (jrow - 1) * b_dim1], &c__, &s, &b[jrow 
 		    + jrow * b_dim1]);
 	    i__3 = jrow + (jrow - 1) * b_dim1;
 	    b[i__3].r = 0., b[i__3].i = 0.;
 	    zrot_(ihi, &a[jrow * a_dim1 + 1], &c__1, &a[(jrow - 1) * a_dim1 + 
 		    1], &c__1, &c__, &s);
 	    i__3 = jrow - 1;
 	    zrot_(&i__3, &b[jrow * b_dim1 + 1], &c__1, &b[(jrow - 1) * b_dim1 
 		    + 1], &c__1, &c__, &s);
 	    if (ilz) {
 		zrot_(n, &z__[jrow * z_dim1 + 1], &c__1, &z__[(jrow - 1) * 
 			z_dim1 + 1], &c__1, &c__, &s);
 	    }
 /* L30: */
 	}
 /* L40: */
    }

    return 0;

 /*     End of ZGGHRD */

 } /* zgghrd_ */

--- a/lapack-netlib/SRC/zgglse.c
+++ b/lapack-netlib/SRC/zgglse.c
@@ -0,0 +1,798 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief <b> ZGGLSE solves overdetermined or underdetermined systems for OTHER matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGGLSE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgglse.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgglse.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgglse.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, */
 /*                          INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), C( * ), D( * ), */
 /*      $                   WORK( * ), X( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGGLSE solves the linear equality-constrained least squares (LSE) */
 /* > problem: */
 /* > */
 /* >         minimize || c - A*x ||_2   subject to   B*x = d */
 /* > */
 /* > where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */
 /* > M-vector, and d is a given P-vector. It is assumed that */
 /* > P <= N <= M+P, and */
 /* > */
 /* >          rank(B) = P and  rank( (A) ) = N. */
 /* >                               ( (B) ) */
 /* > */
 /* > These conditions ensure that the LSE problem has a unique solution, */
 /* > which is obtained using a generalized RQ factorization of the */
 /* > matrices (B, A) given by */
 /* > */
 /* >    B = (0 R)*Q,   A = Z*T*Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrices A and B. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of rows of the matrix B. 0 <= P <= N <= M+P. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,N) */
 /* >          On entry, the P-by-N matrix B. */
 /* >          On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */
 /* >          contains the P-by-P upper triangular matrix R. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is COMPLEX*16 array, dimension (M) */
 /* >          On entry, C contains the right hand side vector for the */
 /* >          least squares part of the LSE problem. */
 /* >          On exit, the residual sum of squares for the solution */
 /* >          is given by the sum of squares of elements N-P+1 to M of */
 /* >          vector C. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is COMPLEX*16 array, dimension (P) */
 /* >          On entry, D contains the right hand side vector for the */
 /* >          constrained equation. */
 /* >          On exit, D is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is COMPLEX*16 array, dimension (N) */
 /* >          On exit, X is the solution of the LSE problem. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,M+N+P). */
 /* >          For optimum performance LWORK >= P+f2cmin(M,N)+f2cmax(M,N)*NB, */
 /* >          where NB is an upper bound for the optimal blocksizes for */
 /* >          ZGEQRF, CGERQF, ZUNMQR and CUNMRQ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          = 1:  the upper triangular factor R associated with B in the */
 /* >                generalized RQ factorization of the pair (B, A) is */
 /* >                singular, so that rank(B) < P; the least squares */
 /* >                solution could not be computed. */
 /* >          = 2:  the (N-P) by (N-P) part of the upper trapezoidal factor */
 /* >                T associated with A in the generalized RQ factorization */
 /* >                of the pair (B, A) is singular, so that */
 /* >                rank( (A) ) < N; the least squares solution could not */
 /* >                    ( (B) ) */
 /* >                be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16OTHERsolve */

 /*  ===================================================================== */
 /* Subroutine */ int zgglse_(integer *m, integer *n, integer *p, 
 	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, 
 	doublecomplex *c__, doublecomplex *d__, doublecomplex *x, 
 	doublecomplex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    doublecomplex z__1;

    /* Local variables */
    integer lopt;
    extern /* Subroutine */ int zgemv_(char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *), 
 	    zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *), ztrmv_(char *, char *, 
 	    char *, integer *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *);
    integer nb, mn, nr;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zggrqf_(integer *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, integer *)
 	    ;
    integer lwkmin, nb1, nb2, nb3, nb4, lwkopt;
    logical lquery;
    extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmrq_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), ztrtrs_(char *, char *, char *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
 	     integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --c__;
    --d__;
    --x;
    --work;

    /* Function Body */
    *info = 0;
    mn = f2cmin(*m,*n);
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*p < 0 || *p > *n || *p < *n - *m) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*p)) {
 	*info = -7;
    }

 /*     Calculate workspace */

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkmin = 1;
 	    lwkopt = 1;
 	} else {
 	    nb1 = ilaenv_(&c__1, "ZGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb2 = ilaenv_(&c__1, "ZGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb3 = ilaenv_(&c__1, "ZUNMQR", " ", m, n, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	    nb4 = ilaenv_(&c__1, "ZUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 /* Computing MAX */
 	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
 	    nb = f2cmax(i__1,nb4);
 	    lwkmin = *m + *n + *p;
 	    lwkopt = *p + mn + f2cmax(*m,*n) * nb;
 	}
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;

 	if (*lwork < lwkmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGGLSE", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Compute the GRQ factorization of matrices B and A: */

 /*            B*Q**H = (  0  T12 ) P   Z**H*A*Q**H = ( R11 R12 ) N-P */
 /*                        N-P  P                     (  0  R22 ) M+P-N */
 /*                                                      N-P  P */

 /*     where T12 and R11 are upper triangular, and Q and Z are */
 /*     unitary. */

    i__1 = *lwork - *p - mn;
    zggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p 
 	    + 1], &work[*p + mn + 1], &i__1, info);
    i__1 = *p + mn + 1;
    lopt = (integer) work[i__1].r;

 /*     Update c = Z**H *c = ( c1 ) N-P */
 /*                       ( c2 ) M+P-N */

    i__1 = f2cmax(1,*m);
    i__2 = *lwork - *p - mn;
    zunmqr_("Left", "Conjugate Transpose", m, &c__1, &mn, &a[a_offset], lda, &
 	    work[*p + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info);
 /* Computing MAX */
    i__3 = *p + mn + 1;
    i__1 = lopt, i__2 = (integer) work[i__3].r;
    lopt = f2cmax(i__1,i__2);

 /*     Solve T12*x2 = d for x2 */

    if (*p > 0) {
 	ztrtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + 
 		1) * b_dim1 + 1], ldb, &d__[1], p, info);

 	if (*info > 0) {
 	    *info = 1;
 	    return 0;
 	}

 /*        Put the solution in X */

 	zcopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1);

 /*        Update c1 */

 	i__1 = *n - *p;
 	z__1.r = -1., z__1.i = 0.;
 	zgemv_("No transpose", &i__1, p, &z__1, &a[(*n - *p + 1) * a_dim1 + 1]
 		, lda, &d__[1], &c__1, &c_b1, &c__[1], &c__1);
    }

 /*     Solve R11*x1 = c1 for x1 */

    if (*n > *p) {
 	i__1 = *n - *p;
 	i__2 = *n - *p;
 	ztrtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[
 		a_offset], lda, &c__[1], &i__2, info);

 	if (*info > 0) {
 	    *info = 2;
 	    return 0;
 	}

 /*        Put the solutions in X */

 	i__1 = *n - *p;
 	zcopy_(&i__1, &c__[1], &c__1, &x[1], &c__1);
    }

 /*     Compute the residual vector: */

    if (*m < *n) {
 	nr = *m + *p - *n;
 	if (nr > 0) {
 	    i__1 = *n - *m;
 	    z__1.r = -1., z__1.i = 0.;
 	    zgemv_("No transpose", &nr, &i__1, &z__1, &a[*n - *p + 1 + (*m + 
 		    1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b1, &c__[*n - *
 		    p + 1], &c__1);
 	}
    } else {
 	nr = *p;
    }
    if (nr > 0) {
 	ztrmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n 
 		- *p + 1) * a_dim1], lda, &d__[1], &c__1);
 	z__1.r = -1., z__1.i = 0.;
 	zaxpy_(&nr, &z__1, &d__[1], &c__1, &c__[*n - *p + 1], &c__1);
    }

 /*     Backward transformation x = Q**H*x */

    i__1 = *lwork - *p - mn;
    zunmrq_("Left", "Conjugate Transpose", n, &c__1, p, &b[b_offset], ldb, &
 	    work[1], &x[1], n, &work[*p + mn + 1], &i__1, info);
 /* Computing MAX */
    i__4 = *p + mn + 1;
    i__2 = lopt, i__3 = (integer) work[i__4].r;
    i__1 = *p + mn + f2cmax(i__2,i__3);
    work[1].r = (doublereal) i__1, work[1].i = 0.;

    return 0;

 /*     End of ZGGLSE */

 } /* zgglse_ */

--- a/lapack-netlib/SRC/zggqrf.c
+++ b/lapack-netlib/SRC/zggqrf.c
@@ -0,0 +1,720 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b ZGGQRF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGGQRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zggqrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zggqrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zggqrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, */
 /*                          LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */
 /*      $                   WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGGQRF computes a generalized QR factorization of an N-by-M matrix A */
 /* > and an N-by-P matrix B: */
 /* > */
 /* >             A = Q*R,        B = Q*T*Z, */
 /* > */
 /* > where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, */
 /* > and R and T assume one of the forms: */
 /* > */
 /* > if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N, */
 /* >                 (  0  ) N-M                         N   M-N */
 /* >                    M */
 /* > */
 /* > where R11 is upper triangular, and */
 /* > */
 /* > if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P, */
 /* >                  P-N  N                           ( T21 ) P */
 /* >                                                      P */
 /* > */
 /* > where T12 or T21 is upper triangular. */
 /* > */
 /* > In particular, if B is square and nonsingular, the GQR factorization */
 /* > of A and B implicitly gives the QR factorization of inv(B)*A: */
 /* > */
 /* >              inv(B)*A = Z**H * (inv(T)*R) */
 /* > */
 /* > where inv(B) denotes the inverse of the matrix B, and Z**H denotes the */
 /* > conjugate transpose of matrix Z. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrices A and B. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of columns of the matrix B.  P >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,M) */
 /* >          On entry, the N-by-M matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(N,M)-by-M upper trapezoidal matrix R (R is */
 /* >          upper triangular if N >= M); the elements below the diagonal, */
 /* >          with the array TAUA, represent the unitary matrix Q as a */
 /* >          product of f2cmin(N,M) elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUA */
 /* > \verbatim */
 /* >          TAUA is COMPLEX*16 array, dimension (f2cmin(N,M)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the unitary matrix Q (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,P) */
 /* >          On entry, the N-by-P matrix B. */
 /* >          On exit, if N <= P, the upper triangle of the subarray */
 /* >          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */
 /* >          if N > P, the elements on and above the (N-P)-th subdiagonal */
 /* >          contain the N-by-P upper trapezoidal matrix T; the remaining */
 /* >          elements, with the array TAUB, represent the unitary */
 /* >          matrix Z as a product of elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUB */
 /* > \verbatim */
 /* >          TAUB is COMPLEX*16 array, dimension (f2cmin(N,P)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the unitary matrix Z (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */
 /* >          For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */
 /* >          where NB1 is the optimal blocksize for the QR factorization */
 /* >          of an N-by-M matrix, NB2 is the optimal blocksize for the */
 /* >          RQ factorization of an N-by-P matrix, and NB3 is the optimal */
 /* >          blocksize for a call of ZUNMQR. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >           = 0:  successful exit */
 /* >           < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16OTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(n,m). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taua * v * v**H */
 /* > */
 /* >  where taua is a complex scalar, and v is a complex vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */
 /* >  and taua in TAUA(i). */
 /* >  To form Q explicitly, use LAPACK subroutine ZUNGQR. */
 /* >  To use Q to update another matrix, use LAPACK subroutine ZUNMQR. */
 /* > */
 /* >  The matrix Z is represented as a product of elementary reflectors */
 /* > */
 /* >     Z = H(1) H(2) . . . H(k), where k = f2cmin(n,p). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taub * v * v**H */
 /* > */
 /* >  where taub is a complex scalar, and v is a complex vector with */
 /* >  v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in */
 /* >  B(n-k+i,1:p-k+i-1), and taub in TAUB(i). */
 /* >  To form Z explicitly, use LAPACK subroutine ZUNGRQ. */
 /* >  To use Z to update another matrix, use LAPACK subroutine ZUNMRQ. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zggqrf_(integer *n, integer *m, integer *p, 
 	doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b,
 	 integer *ldb, doublecomplex *taub, doublecomplex *work, integer *
 	lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer lopt, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *,
 	     integer *, doublecomplex *, doublecomplex *, integer *, integer *
 	    ), zgerqf_(integer *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, integer *);
    integer nb1, nb2, nb3, lwkopt;
    logical lquery;
    extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;

    /* Function Body */
    *info = 0;
    nb1 = ilaenv_(&c__1, "ZGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb2 = ilaenv_(&c__1, "ZGERQF", " ", n, p, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb3 = ilaenv_(&c__1, "ZUNMQR", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
    i__1 = f2cmax(nb1,nb2);
    nb = f2cmax(i__1,nb3);
 /* Computing MAX */
    i__1 = f2cmax(*n,*m);
    lwkopt = f2cmax(i__1,*p) * nb;
    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*m < 0) {
 	*info = -2;
    } else if (*p < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*n), i__1 = f2cmax(i__1,*m);
 	if (*lwork < f2cmax(i__1,*p) && ! lquery) {
 	    *info = -11;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGGQRF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     QR factorization of N-by-M matrix A: A = Q*R */

    zgeqrf_(n, m, &a[a_offset], lda, &taua[1], &work[1], lwork, info);
    lopt = (integer) work[1].r;

 /*     Update B := Q**H*B. */

    i__1 = f2cmin(*n,*m);
    zunmqr_("Left", "Conjugate Transpose", n, p, &i__1, &a[a_offset], lda, &
 	    taua[1], &b[b_offset], ldb, &work[1], lwork, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[1].r;
    lopt = f2cmax(i__1,i__2);

 /*     RQ factorization of N-by-P matrix B: B = T*Z. */

    zgerqf_(n, p, &b[b_offset], ldb, &taub[1], &work[1], lwork, info);
 /* Computing MAX */
    i__2 = lopt, i__3 = (integer) work[1].r;
    i__1 = f2cmax(i__2,i__3);
    work[1].r = (doublereal) i__1, work[1].i = 0.;

    return 0;

 /*     End of ZGGQRF */

 } /* zggqrf_ */

--- a/lapack-netlib/SRC/zggrqf.c
+++ b/lapack-netlib/SRC/zggrqf.c
@@ -0,0 +1,721 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b ZGGRQF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGGRQF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zggrqf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zggrqf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zggrqf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, */
 /*                          LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */
 /*      $                   WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGGRQF computes a generalized RQ factorization of an M-by-N matrix A */
 /* > and a P-by-N matrix B: */
 /* > */
 /* >             A = R*Q,        B = Z*T*Q, */
 /* > */
 /* > where Q is an N-by-N unitary matrix, Z is a P-by-P unitary */
 /* > matrix, and R and T assume one of the forms: */
 /* > */
 /* > if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N, */
 /* >                  N-M  M                           ( R21 ) N */
 /* >                                                      N */
 /* > */
 /* > where R12 or R21 is upper triangular, and */
 /* > */
 /* > if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P, */
 /* >                 (  0  ) P-N                         P   N-P */
 /* >                    N */
 /* > */
 /* > where T11 is upper triangular. */
 /* > */
 /* > In particular, if B is square and nonsingular, the GRQ factorization */
 /* > of A and B implicitly gives the RQ factorization of A*inv(B): */
 /* > */
 /* >              A*inv(B) = (R*inv(T))*Z**H */
 /* > */
 /* > where inv(B) denotes the inverse of the matrix B, and Z**H denotes the */
 /* > conjugate transpose of the matrix Z. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of rows of the matrix B.  P >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrices A and B. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, if M <= N, the upper triangle of the subarray */
 /* >          A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; */
 /* >          if M > N, the elements on and above the (M-N)-th subdiagonal */
 /* >          contain the M-by-N upper trapezoidal matrix R; the remaining */
 /* >          elements, with the array TAUA, represent the unitary */
 /* >          matrix Q as a product of elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUA */
 /* > \verbatim */
 /* >          TAUA is COMPLEX*16 array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the unitary matrix Q (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,N) */
 /* >          On entry, the P-by-N matrix B. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(P,N)-by-N upper trapezoidal matrix T (T is */
 /* >          upper triangular if P >= N); the elements below the diagonal, */
 /* >          with the array TAUB, represent the unitary matrix Z as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUB */
 /* > \verbatim */
 /* >          TAUB is COMPLEX*16 array, dimension (f2cmin(P,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the unitary matrix Z (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */
 /* >          For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */
 /* >          where NB1 is the optimal blocksize for the RQ factorization */
 /* >          of an M-by-N matrix, NB2 is the optimal blocksize for the */
 /* >          QR factorization of a P-by-N matrix, and NB3 is the optimal */
 /* >          blocksize for a call of ZUNMRQ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO=-i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16OTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taua * v * v**H */
 /* > */
 /* >  where taua is a complex scalar, and v is a complex vector with */
 /* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
 /* >  A(m-k+i,1:n-k+i-1), and taua in TAUA(i). */
 /* >  To form Q explicitly, use LAPACK subroutine ZUNGRQ. */
 /* >  To use Q to update another matrix, use LAPACK subroutine ZUNMRQ. */
 /* > */
 /* >  The matrix Z is represented as a product of elementary reflectors */
 /* > */
 /* >     Z = H(1) H(2) . . . H(k), where k = f2cmin(p,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taub * v * v**H */
 /* > */
 /* >  where taub is a complex scalar, and v is a complex vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), */
 /* >  and taub in TAUB(i). */
 /* >  To form Z explicitly, use LAPACK subroutine ZUNGQR. */
 /* >  To use Z to update another matrix, use LAPACK subroutine ZUNMQR. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zggrqf_(integer *m, integer *p, integer *n, 
 	doublecomplex *a, integer *lda, doublecomplex *taua, doublecomplex *b,
 	 integer *ldb, doublecomplex *taub, doublecomplex *work, integer *
 	lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer lopt, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *,
 	     integer *, doublecomplex *, doublecomplex *, integer *, integer *
 	    ), zgerqf_(integer *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, integer *);
    integer nb1, nb2, nb3, lwkopt;
    logical lquery;
    extern /* Subroutine */ int zunmrq_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;

    /* Function Body */
    *info = 0;
    nb1 = ilaenv_(&c__1, "ZGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb2 = ilaenv_(&c__1, "ZGEQRF", " ", p, n, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb3 = ilaenv_(&c__1, "ZUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
    i__1 = f2cmax(nb1,nb2);
    nb = f2cmax(i__1,nb3);
 /* Computing MAX */
    i__1 = f2cmax(*n,*m);
    lwkopt = f2cmax(i__1,*p) * nb;
    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*p < 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*p)) {
 	*info = -8;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m), i__1 = f2cmax(i__1,*p);
 	if (*lwork < f2cmax(i__1,*n) && ! lquery) {
 	    *info = -11;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGGRQF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     RQ factorization of M-by-N matrix A: A = R*Q */

    zgerqf_(m, n, &a[a_offset], lda, &taua[1], &work[1], lwork, info);
    lopt = (integer) work[1].r;

 /*     Update B := B*Q**H */

    i__1 = f2cmin(*m,*n);
 /* Computing MAX */
    i__2 = 1, i__3 = *m - *n + 1;
    zunmrq_("Right", "Conjugate Transpose", p, n, &i__1, &a[f2cmax(i__2,i__3) + 
 	    a_dim1], lda, &taua[1], &b[b_offset], ldb, &work[1], lwork, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[1].r;
    lopt = f2cmax(i__1,i__2);

 /*     QR factorization of P-by-N matrix B: B = Z*T */

    zgeqrf_(p, n, &b[b_offset], ldb, &taub[1], &work[1], lwork, info);
 /* Computing MAX */
    i__2 = lopt, i__3 = (integer) work[1].r;
    i__1 = f2cmax(i__2,i__3);
    work[1].r = (doublereal) i__1, work[1].i = 0.;

    return 0;

 /*     End of ZGGRQF */

 } /* zggrqf_ */

--- a/lapack-netlib/SRC/zggsvd3.c
+++ b/lapack-netlib/SRC/zggsvd3.c
@@ -0,0 +1,947 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;
 static integer c__1 = 1;

 /* > \brief <b> ZGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGGSVD3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zggsvd3
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zggsvd3
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zggsvd3
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, */
 /*                           LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, */
 /*                           LWORK, RWORK, IWORK, INFO ) */

 /*       CHARACTER          JOBQ, JOBU, JOBV */
 /*       INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */
 /*       INTEGER            IWORK( * ) */
 /*       DOUBLE PRECISION   ALPHA( * ), BETA( * ), RWORK( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */
 /*      $                   U( LDU, * ), V( LDV, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGGSVD3 computes the generalized singular value decomposition (GSVD) */
 /* > of an M-by-N complex matrix A and P-by-N complex matrix B: */
 /* > */
 /* >       U**H*A*Q = D1*( 0 R ),    V**H*B*Q = D2*( 0 R ) */
 /* > */
 /* > where U, V and Q are unitary matrices. */
 /* > Let K+L = the effective numerical rank of the */
 /* > matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper */
 /* > triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" */
 /* > matrices and of the following structures, respectively: */
 /* > */
 /* > If M-K-L >= 0, */
 /* > */
 /* >                     K  L */
 /* >        D1 =     K ( I  0 ) */
 /* >                 L ( 0  C ) */
 /* >             M-K-L ( 0  0 ) */
 /* > */
 /* >                   K  L */
 /* >        D2 =   L ( 0  S ) */
 /* >             P-L ( 0  0 ) */
 /* > */
 /* >                 N-K-L  K    L */
 /* >   ( 0 R ) = K (  0   R11  R12 ) */
 /* >             L (  0    0   R22 ) */
 /* > where */
 /* > */
 /* >   C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */
 /* >   S = diag( BETA(K+1),  ... , BETA(K+L) ), */
 /* >   C**2 + S**2 = I. */
 /* > */
 /* >   R is stored in A(1:K+L,N-K-L+1:N) on exit. */
 /* > */
 /* > If M-K-L < 0, */
 /* > */
 /* >                   K M-K K+L-M */
 /* >        D1 =   K ( I  0    0   ) */
 /* >             M-K ( 0  C    0   ) */
 /* > */
 /* >                     K M-K K+L-M */
 /* >        D2 =   M-K ( 0  S    0  ) */
 /* >             K+L-M ( 0  0    I  ) */
 /* >               P-L ( 0  0    0  ) */
 /* > */
 /* >                    N-K-L  K   M-K  K+L-M */
 /* >   ( 0 R ) =     K ( 0    R11  R12  R13  ) */
 /* >               M-K ( 0     0   R22  R23  ) */
 /* >             K+L-M ( 0     0    0   R33  ) */
 /* > */
 /* > where */
 /* > */
 /* >   C = diag( ALPHA(K+1), ... , ALPHA(M) ), */
 /* >   S = diag( BETA(K+1),  ... , BETA(M) ), */
 /* >   C**2 + S**2 = I. */
 /* > */
 /* >   (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored */
 /* >   ( 0  R22 R23 ) */
 /* >   in B(M-K+1:L,N+M-K-L+1:N) on exit. */
 /* > */
 /* > The routine computes C, S, R, and optionally the unitary */
 /* > transformation matrices U, V and Q. */
 /* > */
 /* > In particular, if B is an N-by-N nonsingular matrix, then the GSVD of */
 /* > A and B implicitly gives the SVD of A*inv(B): */
 /* >                      A*inv(B) = U*(D1*inv(D2))*V**H. */
 /* > If ( A**H,B**H)**H has orthonormal columns, then the GSVD of A and B is also */
 /* > equal to the CS decomposition of A and B. Furthermore, the GSVD can */
 /* > be used to derive the solution of the eigenvalue problem: */
 /* >                      A**H*A x = lambda* B**H*B x. */
 /* > In some literature, the GSVD of A and B is presented in the form */
 /* >                  U**H*A*X = ( 0 D1 ),   V**H*B*X = ( 0 D2 ) */
 /* > where U and V are orthogonal and X is nonsingular, and D1 and D2 are */
 /* > ``diagonal''.  The former GSVD form can be converted to the latter */
 /* > form by taking the nonsingular matrix X as */
 /* > */
 /* >                       X = Q*(  I   0    ) */
 /* >                             (  0 inv(R) ) */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBU */
 /* > \verbatim */
 /* >          JOBU is CHARACTER*1 */
 /* >          = 'U':  Unitary matrix U is computed; */
 /* >          = 'N':  U is not computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBV */
 /* > \verbatim */
 /* >          JOBV is CHARACTER*1 */
 /* >          = 'V':  Unitary matrix V is computed; */
 /* >          = 'N':  V is not computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBQ */
 /* > \verbatim */
 /* >          JOBQ is CHARACTER*1 */
 /* >          = 'Q':  Unitary matrix Q is computed; */
 /* >          = 'N':  Q is not computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of rows of the matrix B.  P >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[out] L */
 /* > \verbatim */
 /* >          L is INTEGER */
 /* > */
 /* >          On exit, K and L specify the dimension of the subblocks */
 /* >          described in Purpose. */
 /* >          K + L = effective numerical rank of (A**H,B**H)**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, A contains the triangular matrix R, or part of R. */
 /* >          See Purpose for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,N) */
 /* >          On entry, the P-by-N matrix B. */
 /* >          On exit, B contains part of the triangular matrix R if */
 /* >          M-K-L < 0.  See Purpose for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ALPHA */
 /* > \verbatim */
 /* >          ALPHA is DOUBLE PRECISION array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BETA */
 /* > \verbatim */
 /* >          BETA is DOUBLE PRECISION array, dimension (N) */
 /* > */
 /* >          On exit, ALPHA and BETA contain the generalized singular */
 /* >          value pairs of A and B; */
 /* >            ALPHA(1:K) = 1, */
 /* >            BETA(1:K)  = 0, */
 /* >          and if M-K-L >= 0, */
 /* >            ALPHA(K+1:K+L) = C, */
 /* >            BETA(K+1:K+L)  = S, */
 /* >          or if M-K-L < 0, */
 /* >            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 */
 /* >            BETA(K+1:M) =S, BETA(M+1:K+L) =1 */
 /* >          and */
 /* >            ALPHA(K+L+1:N) = 0 */
 /* >            BETA(K+L+1:N)  = 0 */
 /* > \endverbatim */
 /* > */
 /* > \param[out] U */
 /* > \verbatim */
 /* >          U is COMPLEX*16 array, dimension (LDU,M) */
 /* >          If JOBU = 'U', U contains the M-by-M unitary matrix U. */
 /* >          If JOBU = 'N', U is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDU */
 /* > \verbatim */
 /* >          LDU is INTEGER */
 /* >          The leading dimension of the array U. LDU >= f2cmax(1,M) if */
 /* >          JOBU = 'U'; LDU >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] V */
 /* > \verbatim */
 /* >          V is COMPLEX*16 array, dimension (LDV,P) */
 /* >          If JOBV = 'V', V contains the P-by-P unitary matrix V. */
 /* >          If JOBV = 'N', V is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the array V. LDV >= f2cmax(1,P) if */
 /* >          JOBV = 'V'; LDV >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Q */
 /* > \verbatim */
 /* >          Q is COMPLEX*16 array, dimension (LDQ,N) */
 /* >          If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. */
 /* >          If JOBQ = 'N', Q is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDQ */
 /* > \verbatim */
 /* >          LDQ is INTEGER */
 /* >          The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */
 /* >          JOBQ = 'Q'; LDQ >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* >          On exit, IWORK stores the sorting information. More */
 /* >          precisely, the following loop will sort ALPHA */
 /* >             for I = K+1, f2cmin(M,K+L) */
 /* >                 swap ALPHA(I) and ALPHA(IWORK(I)) */
 /* >             endfor */
 /* >          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = 1, the Jacobi-type procedure failed to */
 /* >                converge.  For further details, see subroutine ZTGSJA. */
 /* > \endverbatim */

 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  TOLA    DOUBLE PRECISION */
 /* >  TOLB    DOUBLE PRECISION */
 /* >          TOLA and TOLB are the thresholds to determine the effective */
 /* >          rank of (A**H,B**H)**H. Generally, they are set to */
 /* >                   TOLA = MAX(M,N)*norm(A)*MACHEPS, */
 /* >                   TOLB = MAX(P,N)*norm(B)*MACHEPS. */
 /* >          The size of TOLA and TOLB may affect the size of backward */
 /* >          errors of the decomposition. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date August 2015 */

 /* > \ingroup complex16GEsing */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Ming Gu and Huan Ren, Computer Science Division, University of */
 /* >     California at Berkeley, USA */
 /* > */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* >  ZGGSVD3 replaces the deprecated subroutine ZGGSVD. */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zggsvd3_(char *jobu, char *jobv, char *jobq, integer *m, 
 	integer *n, integer *p, integer *k, integer *l, doublecomplex *a, 
 	integer *lda, doublecomplex *b, integer *ldb, doublereal *alpha, 
 	doublereal *beta, doublecomplex *u, integer *ldu, doublecomplex *v, 
 	integer *ldv, doublecomplex *q, integer *ldq, doublecomplex *work, 
 	integer *lwork, doublereal *rwork, integer *iwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, 
 	    u_offset, v_dim1, v_offset, i__1, i__2;
    doublecomplex z__1;

    /* Local variables */
    integer ibnd;
    doublereal tola;
    integer isub;
    doublereal tolb, unfl, temp, smax;
    integer ncallmycycle, i__, j;
    extern logical lsame_(char *, char *);
    doublereal anorm, bnorm;
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *);
    logical wantq, wantu, wantv;
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, 
 	    integer *, doublereal *);
    extern /* Subroutine */ int ztgsja_(char *, char *, char *, integer *, 
 	    integer *, integer *, integer *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *, doublereal *, doublereal *,
 	     doublereal *, doublereal *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int zggsvp3_(char *, char *, char *, integer *, 
 	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
 	     integer *, doublereal *, doublereal *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, integer *, doublereal *, 
 	    doublecomplex *, doublecomplex *, integer *, integer *);
    doublereal ulp;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     August 2015 */


 /*  ===================================================================== */


 /*     Decode and test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --alpha;
    --beta;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantu = lsame_(jobu, "U");
    wantv = lsame_(jobv, "V");
    wantq = lsame_(jobq, "Q");
    lquery = *lwork == -1;
    lwkopt = 1;

 /*     Test the input arguments */

    *info = 0;
    if (! (wantu || lsame_(jobu, "N"))) {
 	*info = -1;
    } else if (! (wantv || lsame_(jobv, "N"))) {
 	*info = -2;
    } else if (! (wantq || lsame_(jobq, "N"))) {
 	*info = -3;
    } else if (*m < 0) {
 	*info = -4;
    } else if (*n < 0) {
 	*info = -5;
    } else if (*p < 0) {
 	*info = -6;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -10;
    } else if (*ldb < f2cmax(1,*p)) {
 	*info = -12;
    } else if (*ldu < 1 || wantu && *ldu < *m) {
 	*info = -16;
    } else if (*ldv < 1 || wantv && *ldv < *p) {
 	*info = -18;
    } else if (*ldq < 1 || wantq && *ldq < *n) {
 	*info = -20;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -24;
    }

 /*     Compute workspace */

    if (*info == 0) {
 	zggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], 
 		ldb, &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv,
 		 &q[q_offset], ldq, &iwork[1], &rwork[1], &work[1], &work[1], 
 		&c_n1, info);
 	lwkopt = *n + (integer) work[1].r;
 /* Computing MAX */
 	i__1 = *n << 1;
 	lwkopt = f2cmax(i__1,lwkopt);
 	lwkopt = f2cmax(1,lwkopt);
 	z__1.r = (doublereal) lwkopt, z__1.i = 0.;
 	work[1].r = z__1.r, work[1].i = z__1.i;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGGSVD3", &i__1, (ftnlen)7);
 	return 0;
    }
    if (lquery) {
 	return 0;
    }

 /*     Compute the Frobenius norm of matrices A and B */

    anorm = zlange_("1", m, n, &a[a_offset], lda, &rwork[1]);
    bnorm = zlange_("1", p, n, &b[b_offset], ldb, &rwork[1]);

 /*     Get machine precision and set up threshold for determining */
 /*     the effective numerical rank of the matrices A and B. */

    ulp = dlamch_("Precision");
    unfl = dlamch_("Safe Minimum");
    tola = f2cmax(*m,*n) * f2cmax(anorm,unfl) * ulp;
    tolb = f2cmax(*p,*n) * f2cmax(bnorm,unfl) * ulp;

    i__1 = *lwork - *n;
    zggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], ldb, 
 	    &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, &q[
 	    q_offset], ldq, &iwork[1], &rwork[1], &work[1], &work[*n + 1], &
 	    i__1, info);

 /*     Compute the GSVD of two upper "triangular" matrices */

    ztgsja_(jobu, jobv, jobq, m, p, n, k, l, &a[a_offset], lda, &b[b_offset], 
 	    ldb, &tola, &tolb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[
 	    v_offset], ldv, &q[q_offset], ldq, &work[1], &ncallmycycle, info);

 /*     Sort the singular values and store the pivot indices in IWORK */
 /*     Copy ALPHA to RWORK, then sort ALPHA in RWORK */

    dcopy_(n, &alpha[1], &c__1, &rwork[1], &c__1);
 /* Computing MIN */
    i__1 = *l, i__2 = *m - *k;
    ibnd = f2cmin(i__1,i__2);
    i__1 = ibnd;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Scan for largest ALPHA(K+I) */

 	isub = i__;
 	smax = rwork[*k + i__];
 	i__2 = ibnd;
 	for (j = i__ + 1; j <= i__2; ++j) {
 	    temp = rwork[*k + j];
 	    if (temp > smax) {
 		isub = j;
 		smax = temp;
 	    }
 /* L10: */
 	}
 	if (isub != i__) {
 	    rwork[*k + isub] = rwork[*k + i__];
 	    rwork[*k + i__] = smax;
 	    iwork[*k + i__] = *k + isub;
 	} else {
 	    iwork[*k + i__] = *k + i__;
 	}
 /* L20: */
    }

    z__1.r = (doublereal) lwkopt, z__1.i = 0.;
    work[1].r = z__1.r, work[1].i = z__1.i;
    return 0;

 /*     End of ZGGSVD3 */

 } /* zggsvd3_ */

--- a/lapack-netlib/SRC/zggsvp3.c
+++ b/lapack-netlib/SRC/zggsvp3.c
--- a/lapack-netlib/SRC/zgsvj0.c
+++ b/lapack-netlib/SRC/zgsvj0.c
--- a/lapack-netlib/SRC/zgsvj1.c
+++ b/lapack-netlib/SRC/zgsvj1.c
--- a/lapack-netlib/SRC/zgtcon.c
+++ b/lapack-netlib/SRC/zgtcon.c
@@ -0,0 +1,647 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b ZGTCON */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGTCON + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgtcon.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgtcon.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgtcon.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, */
 /*                          WORK, INFO ) */

 /*       CHARACTER          NORM */
 /*       INTEGER            INFO, N */
 /*       DOUBLE PRECISION   ANORM, RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGTCON estimates the reciprocal of the condition number of a complex */
 /* > tridiagonal matrix A using the LU factorization as computed by */
 /* > ZGTTRF. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] NORM */
 /* > \verbatim */
 /* >          NORM is CHARACTER*1 */
 /* >          Specifies whether the 1-norm condition number or the */
 /* >          infinity-norm condition number is required: */
 /* >          = '1' or 'O':  1-norm; */
 /* >          = 'I':         Infinity-norm. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX*16 array, dimension (N-1) */
 /* >          The (n-1) multipliers that define the matrix L from the */
 /* >          LU factorization of A as computed by ZGTTRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is COMPLEX*16 array, dimension (N) */
 /* >          The n diagonal elements of the upper triangular matrix U from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX*16 array, dimension (N-1) */
 /* >          The (n-1) elements of the first superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU2 */
 /* > \verbatim */
 /* >          DU2 is COMPLEX*16 array, dimension (N-2) */
 /* >          The (n-2) elements of the second superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is DOUBLE PRECISION */
 /* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
 /* >          If NORM = 'I', the infinity-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is DOUBLE PRECISION */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
 /* >          estimate of the 1-norm of inv(A) computed in this routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zgtcon_(char *norm, integer *n, doublecomplex *dl, 
 	doublecomplex *d__, doublecomplex *du, doublecomplex *du2, integer *
 	ipiv, doublereal *anorm, doublereal *rcond, doublecomplex *work, 
 	integer *info)
 {
    /* System generated locals */
    integer i__1, i__2;

    /* Local variables */
    integer kase, kase1, i__;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int zlacn2_(integer *, doublecomplex *, 
 	    doublecomplex *, doublereal *, integer *, integer *), xerbla_(
 	    char *, integer *, ftnlen);
    doublereal ainvnm;
    logical onenrm;
    extern /* Subroutine */ int zgttrs_(char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *
 	    , integer *, doublecomplex *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments. */

    /* Parameter adjustments */
    --work;
    --ipiv;
    --du2;
    --du;
    --d__;
    --dl;

    /* Function Body */
    *info = 0;
    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
    if (! onenrm && ! lsame_(norm, "I")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*anorm < 0.) {
 	*info = -8;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGTCON", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.;
    if (*n == 0) {
 	*rcond = 1.;
 	return 0;
    } else if (*anorm == 0.) {
 	return 0;
    }

 /*     Check that D(1:N) is non-zero. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	if (d__[i__2].r == 0. && d__[i__2].i == 0.) {
 	    return 0;
 	}
 /* L10: */
    }

    ainvnm = 0.;
    if (onenrm) {
 	kase1 = 1;
    } else {
 	kase1 = 2;
    }
    kase = 0;
 L20:
    zlacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave);
    if (kase != 0) {
 	if (kase == kase1) {

 /*           Multiply by inv(U)*inv(L). */

 	    zgttrs_("No transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1]
 		    , &ipiv[1], &work[1], n, info);
 	} else {

 /*           Multiply by inv(L**H)*inv(U**H). */

 	    zgttrs_("Conjugate transpose", n, &c__1, &dl[1], &d__[1], &du[1], 
 		    &du2[1], &ipiv[1], &work[1], n, info);
 	}
 	goto L20;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.) {
 	*rcond = 1. / ainvnm / *anorm;
    }

    return 0;

 /*     End of ZGTCON */

 } /* zgtcon_ */

--- a/lapack-netlib/SRC/zgtrfs.c
+++ b/lapack-netlib/SRC/zgtrfs.c
--- a/lapack-netlib/SRC/zgtsv.c
+++ b/lapack-netlib/SRC/zgtsv.c
@@ -0,0 +1,711 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief <b> ZGTSV computes the solution to system of linear equations A * X = B for GT matrices </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGTSV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgtsv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgtsv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgtsv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) */

 /*       INTEGER            INFO, LDB, N, NRHS */
 /*       COMPLEX*16         B( LDB, * ), D( * ), DL( * ), DU( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGTSV  solves the equation */
 /* > */
 /* >    A*X = B, */
 /* > */
 /* > where A is an N-by-N tridiagonal matrix, by Gaussian elimination with */
 /* > partial pivoting. */
 /* > */
 /* > Note that the equation  A**T *X = B  may be solved by interchanging the */
 /* > order of the arguments DU and DL. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX*16 array, dimension (N-1) */
 /* >          On entry, DL must contain the (n-1) subdiagonal elements of */
 /* >          A. */
 /* >          On exit, DL is overwritten by the (n-2) elements of the */
 /* >          second superdiagonal of the upper triangular matrix U from */
 /* >          the LU factorization of A, in DL(1), ..., DL(n-2). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is COMPLEX*16 array, dimension (N) */
 /* >          On entry, D must contain the diagonal elements of A. */
 /* >          On exit, D is overwritten by the n diagonal elements of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX*16 array, dimension (N-1) */
 /* >          On entry, DU must contain the (n-1) superdiagonal elements */
 /* >          of A. */
 /* >          On exit, DU is overwritten by the (n-1) elements of the first */
 /* >          superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero, and the solution */
 /* >                has not been computed.  The factorization has not been */
 /* >                completed unless i = N. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GTsolve */

 /*  ===================================================================== */
 /* Subroutine */ int zgtsv_(integer *n, integer *nrhs, doublecomplex *dl, 
 	doublecomplex *d__, doublecomplex *du, doublecomplex *b, integer *ldb,
 	 integer *info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1, z__2, z__3, z__4, z__5;

    /* Local variables */
    doublecomplex temp, mult;
    integer j, k;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
    } else if (*nrhs < 0) {
 	*info = -2;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGTSV ", &i__1, (ftnlen)6);
 	return 0;
    }

    if (*n == 0) {
 	return 0;
    }

    i__1 = *n - 1;
    for (k = 1; k <= i__1; ++k) {
 	i__2 = k;
 	if (dl[i__2].r == 0. && dl[i__2].i == 0.) {

 /*           Subdiagonal is zero, no elimination is required. */

 	    i__2 = k;
 	    if (d__[i__2].r == 0. && d__[i__2].i == 0.) {

 /*              Diagonal is zero: set INFO = K and return; a unique */
 /*              solution can not be found. */

 		*info = k;
 		return 0;
 	    }
 	} else /* if(complicated condition) */ {
 	    i__2 = k;
 	    i__3 = k;
 	    if ((d__1 = d__[i__2].r, abs(d__1)) + (d__2 = d_imag(&d__[k]), 
 		    abs(d__2)) >= (d__3 = dl[i__3].r, abs(d__3)) + (d__4 = 
 		    d_imag(&dl[k]), abs(d__4))) {

 /*           No row interchange required */

 		z_div(&z__1, &dl[k], &d__[k]);
 		mult.r = z__1.r, mult.i = z__1.i;
 		i__2 = k + 1;
 		i__3 = k + 1;
 		i__4 = k;
 		z__2.r = mult.r * du[i__4].r - mult.i * du[i__4].i, z__2.i = 
 			mult.r * du[i__4].i + mult.i * du[i__4].r;
 		z__1.r = d__[i__3].r - z__2.r, z__1.i = d__[i__3].i - z__2.i;
 		d__[i__2].r = z__1.r, d__[i__2].i = z__1.i;
 		i__2 = *nrhs;
 		for (j = 1; j <= i__2; ++j) {
 		    i__3 = k + 1 + j * b_dim1;
 		    i__4 = k + 1 + j * b_dim1;
 		    i__5 = k + j * b_dim1;
 		    z__2.r = mult.r * b[i__5].r - mult.i * b[i__5].i, z__2.i =
 			     mult.r * b[i__5].i + mult.i * b[i__5].r;
 		    z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i;
 		    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
 /* L10: */
 		}
 		if (k < *n - 1) {
 		    i__2 = k;
 		    dl[i__2].r = 0., dl[i__2].i = 0.;
 		}
 	    } else {

 /*           Interchange rows K and K+1 */

 		z_div(&z__1, &d__[k], &dl[k]);
 		mult.r = z__1.r, mult.i = z__1.i;
 		i__2 = k;
 		i__3 = k;
 		d__[i__2].r = dl[i__3].r, d__[i__2].i = dl[i__3].i;
 		i__2 = k + 1;
 		temp.r = d__[i__2].r, temp.i = d__[i__2].i;
 		i__2 = k + 1;
 		i__3 = k;
 		z__2.r = mult.r * temp.r - mult.i * temp.i, z__2.i = mult.r * 
 			temp.i + mult.i * temp.r;
 		z__1.r = du[i__3].r - z__2.r, z__1.i = du[i__3].i - z__2.i;
 		d__[i__2].r = z__1.r, d__[i__2].i = z__1.i;
 		if (k < *n - 1) {
 		    i__2 = k;
 		    i__3 = k + 1;
 		    dl[i__2].r = du[i__3].r, dl[i__2].i = du[i__3].i;
 		    i__2 = k + 1;
 		    z__2.r = -mult.r, z__2.i = -mult.i;
 		    i__3 = k;
 		    z__1.r = z__2.r * dl[i__3].r - z__2.i * dl[i__3].i, 
 			    z__1.i = z__2.r * dl[i__3].i + z__2.i * dl[i__3]
 			    .r;
 		    du[i__2].r = z__1.r, du[i__2].i = z__1.i;
 		}
 		i__2 = k;
 		du[i__2].r = temp.r, du[i__2].i = temp.i;
 		i__2 = *nrhs;
 		for (j = 1; j <= i__2; ++j) {
 		    i__3 = k + j * b_dim1;
 		    temp.r = b[i__3].r, temp.i = b[i__3].i;
 		    i__3 = k + j * b_dim1;
 		    i__4 = k + 1 + j * b_dim1;
 		    b[i__3].r = b[i__4].r, b[i__3].i = b[i__4].i;
 		    i__3 = k + 1 + j * b_dim1;
 		    i__4 = k + 1 + j * b_dim1;
 		    z__2.r = mult.r * b[i__4].r - mult.i * b[i__4].i, z__2.i =
 			     mult.r * b[i__4].i + mult.i * b[i__4].r;
 		    z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i;
 		    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
 /* L20: */
 		}
 	    }
 	}
 /* L30: */
    }
    i__1 = *n;
    if (d__[i__1].r == 0. && d__[i__1].i == 0.) {
 	*info = *n;
 	return 0;
    }

 /*     Back solve with the matrix U from the factorization. */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *n + j * b_dim1;
 	z_div(&z__1, &b[*n + j * b_dim1], &d__[*n]);
 	b[i__2].r = z__1.r, b[i__2].i = z__1.i;
 	if (*n > 1) {
 	    i__2 = *n - 1 + j * b_dim1;
 	    i__3 = *n - 1 + j * b_dim1;
 	    i__4 = *n - 1;
 	    i__5 = *n + j * b_dim1;
 	    z__3.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, z__3.i =
 		     du[i__4].r * b[i__5].i + du[i__4].i * b[i__5].r;
 	    z__2.r = b[i__3].r - z__3.r, z__2.i = b[i__3].i - z__3.i;
 	    z_div(&z__1, &z__2, &d__[*n - 1]);
 	    b[i__2].r = z__1.r, b[i__2].i = z__1.i;
 	}
 	for (k = *n - 2; k >= 1; --k) {
 	    i__2 = k + j * b_dim1;
 	    i__3 = k + j * b_dim1;
 	    i__4 = k;
 	    i__5 = k + 1 + j * b_dim1;
 	    z__4.r = du[i__4].r * b[i__5].r - du[i__4].i * b[i__5].i, z__4.i =
 		     du[i__4].r * b[i__5].i + du[i__4].i * b[i__5].r;
 	    z__3.r = b[i__3].r - z__4.r, z__3.i = b[i__3].i - z__4.i;
 	    i__6 = k;
 	    i__7 = k + 2 + j * b_dim1;
 	    z__5.r = dl[i__6].r * b[i__7].r - dl[i__6].i * b[i__7].i, z__5.i =
 		     dl[i__6].r * b[i__7].i + dl[i__6].i * b[i__7].r;
 	    z__2.r = z__3.r - z__5.r, z__2.i = z__3.i - z__5.i;
 	    z_div(&z__1, &z__2, &d__[k]);
 	    b[i__2].r = z__1.r, b[i__2].i = z__1.i;
 /* L40: */
 	}
 /* L50: */
    }

    return 0;

 /*     End of ZGTSV */

 } /* zgtsv_ */

--- a/lapack-netlib/SRC/zgtsvx.c
+++ b/lapack-netlib/SRC/zgtsvx.c
@@ -0,0 +1,835 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief <b> ZGTSVX computes the solution to system of linear equations A * X = B for GT matrices </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGTSVX + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgtsvx.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgtsvx.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgtsvx.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, */
 /*                          DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, */
 /*                          WORK, RWORK, INFO ) */

 /*       CHARACTER          FACT, TRANS */
 /*       INTEGER            INFO, LDB, LDX, N, NRHS */
 /*       DOUBLE PRECISION   RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * ) */
 /*       COMPLEX*16         B( LDB, * ), D( * ), DF( * ), DL( * ), */
 /*      $                   DLF( * ), DU( * ), DU2( * ), DUF( * ), */
 /*      $                   WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGTSVX uses the LU factorization to compute the solution to a complex */
 /* > system of linear equations A * X = B, A**T * X = B, or A**H * X = B, */
 /* > where A is a tridiagonal matrix of order N and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > Error bounds on the solution and a condition estimate are also */
 /* > provided. */
 /* > \endverbatim */

 /* > \par Description: */
 /*  ================= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > The following steps are performed: */
 /* > */
 /* > 1. If FACT = 'N', the LU decomposition is used to factor the matrix A */
 /* >    as A = L * U, where L is a product of permutation and unit lower */
 /* >    bidiagonal matrices and U is upper triangular with nonzeros in */
 /* >    only the main diagonal and first two superdiagonals. */
 /* > */
 /* > 2. If some U(i,i)=0, so that U is exactly singular, then the routine */
 /* >    returns with INFO = i. Otherwise, the factored form of A is used */
 /* >    to estimate the condition number of the matrix A.  If the */
 /* >    reciprocal of the condition number is less than machine precision, */
 /* >    INFO = N+1 is returned as a warning, but the routine still goes on */
 /* >    to solve for X and compute error bounds as described below. */
 /* > */
 /* > 3. The system of equations is solved for X using the factored form */
 /* >    of A. */
 /* > */
 /* > 4. Iterative refinement is applied to improve the computed solution */
 /* >    matrix and calculate error bounds and backward error estimates */
 /* >    for it. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] FACT */
 /* > \verbatim */
 /* >          FACT is CHARACTER*1 */
 /* >          Specifies whether or not the factored form of A has been */
 /* >          supplied on entry. */
 /* >          = 'F':  DLF, DF, DUF, DU2, and IPIV contain the factored form */
 /* >                  of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not */
 /* >                  be modified. */
 /* >          = 'N':  The matrix will be copied to DLF, DF, and DUF */
 /* >                  and factored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX*16 array, dimension (N-1) */
 /* >          The (n-1) subdiagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is COMPLEX*16 array, dimension (N) */
 /* >          The n diagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX*16 array, dimension (N-1) */
 /* >          The (n-1) superdiagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DLF */
 /* > \verbatim */
 /* >          DLF is COMPLEX*16 array, dimension (N-1) */
 /* >          If FACT = 'F', then DLF is an input argument and on entry */
 /* >          contains the (n-1) multipliers that define the matrix L from */
 /* >          the LU factorization of A as computed by ZGTTRF. */
 /* > */
 /* >          If FACT = 'N', then DLF is an output argument and on exit */
 /* >          contains the (n-1) multipliers that define the matrix L from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DF */
 /* > \verbatim */
 /* >          DF is COMPLEX*16 array, dimension (N) */
 /* >          If FACT = 'F', then DF is an input argument and on entry */
 /* >          contains the n diagonal elements of the upper triangular */
 /* >          matrix U from the LU factorization of A. */
 /* > */
 /* >          If FACT = 'N', then DF is an output argument and on exit */
 /* >          contains the n diagonal elements of the upper triangular */
 /* >          matrix U from the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DUF */
 /* > \verbatim */
 /* >          DUF is COMPLEX*16 array, dimension (N-1) */
 /* >          If FACT = 'F', then DUF is an input argument and on entry */
 /* >          contains the (n-1) elements of the first superdiagonal of U. */
 /* > */
 /* >          If FACT = 'N', then DUF is an output argument and on exit */
 /* >          contains the (n-1) elements of the first superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DU2 */
 /* > \verbatim */
 /* >          DU2 is COMPLEX*16 array, dimension (N-2) */
 /* >          If FACT = 'F', then DU2 is an input argument and on entry */
 /* >          contains the (n-2) elements of the second superdiagonal of */
 /* >          U. */
 /* > */
 /* >          If FACT = 'N', then DU2 is an output argument and on exit */
 /* >          contains the (n-2) elements of the second superdiagonal of */
 /* >          U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          If FACT = 'F', then IPIV is an input argument and on entry */
 /* >          contains the pivot indices from the LU factorization of A as */
 /* >          computed by ZGTTRF. */
 /* > */
 /* >          If FACT = 'N', then IPIV is an output argument and on exit */
 /* >          contains the pivot indices from the LU factorization of A; */
 /* >          row i of the matrix was interchanged with row IPIV(i). */
 /* >          IPIV(i) will always be either i or i+1; IPIV(i) = i indicates */
 /* >          a row interchange was not required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          The N-by-NRHS right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is COMPLEX*16 array, dimension (LDX,NRHS) */
 /* >          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is DOUBLE PRECISION */
 /* >          The estimate of the reciprocal condition number of the matrix */
 /* >          A.  If RCOND is less than the machine precision (in */
 /* >          particular, if RCOND = 0), the matrix is singular to working */
 /* >          precision.  This condition is indicated by a return code of */
 /* >          INFO > 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is */
 /* >                <= N:  U(i,i) is exactly zero.  The factorization */
 /* >                       has not been completed unless i = N, but the */
 /* >                       factor U is exactly singular, so the solution */
 /* >                       and error bounds could not be computed. */
 /* >                       RCOND = 0 is returned. */
 /* >                = N+1: U is nonsingular, but RCOND is less than machine */
 /* >                       precision, meaning that the matrix is singular */
 /* >                       to working precision.  Nevertheless, the */
 /* >                       solution and error bounds are computed because */
 /* >                       there are a number of situations where the */
 /* >                       computed solution can be more accurate than the */
 /* >                       value of RCOND would suggest. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GTsolve */

 /*  ===================================================================== */
 /* Subroutine */ int zgtsvx_(char *fact, char *trans, integer *n, integer *
 	nrhs, doublecomplex *dl, doublecomplex *d__, doublecomplex *du, 
 	doublecomplex *dlf, doublecomplex *df, doublecomplex *duf, 
 	doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, 
 	doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, 
 	doublereal *berr, doublecomplex *work, doublereal *rwork, integer *
 	info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1;

    /* Local variables */
    char norm[1];
    extern logical lsame_(char *, char *);
    doublereal anorm;
    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *);
    extern doublereal dlamch_(char *);
    logical nofact;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern doublereal zlangt_(char *, integer *, doublecomplex *, 
 	    doublecomplex *, doublecomplex *);
    logical notran;
    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *), 
 	    zgtcon_(char *, integer *, doublecomplex *, doublecomplex *, 
 	    doublecomplex *, doublecomplex *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *), zgtrfs_(char *,
 	     integer *, integer *, doublecomplex *, doublecomplex *, 
 	    doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *
 	    , doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublereal *, 
 	    doublecomplex *, doublereal *, integer *), zgttrf_(
 	    integer *, doublecomplex *, doublecomplex *, doublecomplex *, 
 	    doublecomplex *, integer *, integer *), zgttrs_(char *, integer *,
 	     integer *, doublecomplex *, doublecomplex *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    --dlf;
    --df;
    --duf;
    --du2;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    nofact = lsame_(fact, "N");
    notran = lsame_(trans, "N");
    if (! nofact && ! lsame_(fact, "F")) {
 	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! 
 	    lsame_(trans, "C")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -14;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -16;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGTSVX", &i__1, (ftnlen)6);
 	return 0;
    }

    if (nofact) {

 /*        Compute the LU factorization of A. */

 	zcopy_(n, &d__[1], &c__1, &df[1], &c__1);
 	if (*n > 1) {
 	    i__1 = *n - 1;
 	    zcopy_(&i__1, &dl[1], &c__1, &dlf[1], &c__1);
 	    i__1 = *n - 1;
 	    zcopy_(&i__1, &du[1], &c__1, &duf[1], &c__1);
 	}
 	zgttrf_(n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], info);

 /*        Return if INFO is non-zero. */

 	if (*info > 0) {
 	    *rcond = 0.;
 	    return 0;
 	}
    }

 /*     Compute the norm of the matrix A. */

    if (notran) {
 	*(unsigned char *)norm = '1';
    } else {
 	*(unsigned char *)norm = 'I';
    }
    anorm = zlangt_(norm, n, &dl[1], &d__[1], &du[1]);

 /*     Compute the reciprocal of the condition number of A. */

    zgtcon_(norm, n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &anorm, 
 	    rcond, &work[1], info);

 /*     Compute the solution vectors X. */

    zlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
    zgttrs_(trans, n, nrhs, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &x[
 	    x_offset], ldx, info);

 /*     Use iterative refinement to improve the computed solutions and */
 /*     compute error bounds and backward error estimates for them. */

    zgtrfs_(trans, n, nrhs, &dl[1], &d__[1], &du[1], &dlf[1], &df[1], &duf[1],
 	     &du2[1], &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1]
 	    , &berr[1], &work[1], &rwork[1], info);

 /*     Set INFO = N+1 if the matrix is singular to working precision. */

    if (*rcond < dlamch_("Epsilon")) {
 	*info = *n + 1;
    }

    return 0;

 /*     End of ZGTSVX */

 } /* zgtsvx_ */

--- a/lapack-netlib/SRC/zgttrf.c
+++ b/lapack-netlib/SRC/zgttrf.c
@@ -0,0 +1,696 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b ZGTTRF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGTTRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgttrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgttrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgttrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) */

 /*       INTEGER            INFO, N */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         D( * ), DL( * ), DU( * ), DU2( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGTTRF computes an LU factorization of a complex tridiagonal matrix A */
 /* > using elimination with partial pivoting and row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = L * U */
 /* > where L is a product of permutation and unit lower bidiagonal */
 /* > matrices and U is upper triangular with nonzeros in only the main */
 /* > diagonal and first two superdiagonals. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX*16 array, dimension (N-1) */
 /* >          On entry, DL must contain the (n-1) sub-diagonal elements of */
 /* >          A. */
 /* > */
 /* >          On exit, DL is overwritten by the (n-1) multipliers that */
 /* >          define the matrix L from the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is COMPLEX*16 array, dimension (N) */
 /* >          On entry, D must contain the diagonal elements of A. */
 /* > */
 /* >          On exit, D is overwritten by the n diagonal elements of the */
 /* >          upper triangular matrix U from the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX*16 array, dimension (N-1) */
 /* >          On entry, DU must contain the (n-1) super-diagonal elements */
 /* >          of A. */
 /* > */
 /* >          On exit, DU is overwritten by the (n-1) elements of the first */
 /* >          super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] DU2 */
 /* > \verbatim */
 /* >          DU2 is COMPLEX*16 array, dimension (N-2) */
 /* >          On exit, DU2 is overwritten by the (n-2) elements of the */
 /* >          second super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
 /* >          > 0:  if INFO = k, U(k,k) is exactly zero. The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and division by zero will occur if it is used */
 /* >                to solve a system of equations. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zgttrf_(integer *n, doublecomplex *dl, doublecomplex *
 	d__, doublecomplex *du, doublecomplex *du2, integer *ipiv, integer *
 	info)
 {
    /* System generated locals */
    integer i__1, i__2, i__3, i__4;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1, z__2;

    /* Local variables */
    doublecomplex fact, temp;
    integer i__;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --ipiv;
    --du2;
    --du;
    --d__;
    --dl;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
 	i__1 = -(*info);
 	xerbla_("ZGTTRF", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Initialize IPIV(i) = i and DU2(i) = 0 */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	ipiv[i__] = i__;
 /* L10: */
    }
    i__1 = *n - 2;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	du2[i__2].r = 0., du2[i__2].i = 0.;
 /* L20: */
    }

    i__1 = *n - 2;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	i__3 = i__;
 	if ((d__1 = d__[i__2].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), abs(
 		d__2)) >= (d__3 = dl[i__3].r, abs(d__3)) + (d__4 = d_imag(&dl[
 		i__]), abs(d__4))) {

 /*           No row interchange required, eliminate DL(I) */

 	    i__2 = i__;
 	    if ((d__1 = d__[i__2].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), 
 		    abs(d__2)) != 0.) {
 		z_div(&z__1, &dl[i__], &d__[i__]);
 		fact.r = z__1.r, fact.i = z__1.i;
 		i__2 = i__;
 		dl[i__2].r = fact.r, dl[i__2].i = fact.i;
 		i__2 = i__ + 1;
 		i__3 = i__ + 1;
 		i__4 = i__;
 		z__2.r = fact.r * du[i__4].r - fact.i * du[i__4].i, z__2.i = 
 			fact.r * du[i__4].i + fact.i * du[i__4].r;
 		z__1.r = d__[i__3].r - z__2.r, z__1.i = d__[i__3].i - z__2.i;
 		d__[i__2].r = z__1.r, d__[i__2].i = z__1.i;
 	    }
 	} else {

 /*           Interchange rows I and I+1, eliminate DL(I) */

 	    z_div(&z__1, &d__[i__], &dl[i__]);
 	    fact.r = z__1.r, fact.i = z__1.i;
 	    i__2 = i__;
 	    i__3 = i__;
 	    d__[i__2].r = dl[i__3].r, d__[i__2].i = dl[i__3].i;
 	    i__2 = i__;
 	    dl[i__2].r = fact.r, dl[i__2].i = fact.i;
 	    i__2 = i__;
 	    temp.r = du[i__2].r, temp.i = du[i__2].i;
 	    i__2 = i__;
 	    i__3 = i__ + 1;
 	    du[i__2].r = d__[i__3].r, du[i__2].i = d__[i__3].i;
 	    i__2 = i__ + 1;
 	    i__3 = i__ + 1;
 	    z__2.r = fact.r * d__[i__3].r - fact.i * d__[i__3].i, z__2.i = 
 		    fact.r * d__[i__3].i + fact.i * d__[i__3].r;
 	    z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i;
 	    d__[i__2].r = z__1.r, d__[i__2].i = z__1.i;
 	    i__2 = i__;
 	    i__3 = i__ + 1;
 	    du2[i__2].r = du[i__3].r, du2[i__2].i = du[i__3].i;
 	    i__2 = i__ + 1;
 	    z__2.r = -fact.r, z__2.i = -fact.i;
 	    i__3 = i__ + 1;
 	    z__1.r = z__2.r * du[i__3].r - z__2.i * du[i__3].i, z__1.i = 
 		    z__2.r * du[i__3].i + z__2.i * du[i__3].r;
 	    du[i__2].r = z__1.r, du[i__2].i = z__1.i;
 	    ipiv[i__] = i__ + 1;
 	}
 /* L30: */
    }
    if (*n > 1) {
 	i__ = *n - 1;
 	i__1 = i__;
 	i__2 = i__;
 	if ((d__1 = d__[i__1].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), abs(
 		d__2)) >= (d__3 = dl[i__2].r, abs(d__3)) + (d__4 = d_imag(&dl[
 		i__]), abs(d__4))) {
 	    i__1 = i__;
 	    if ((d__1 = d__[i__1].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), 
 		    abs(d__2)) != 0.) {
 		z_div(&z__1, &dl[i__], &d__[i__]);
 		fact.r = z__1.r, fact.i = z__1.i;
 		i__1 = i__;
 		dl[i__1].r = fact.r, dl[i__1].i = fact.i;
 		i__1 = i__ + 1;
 		i__2 = i__ + 1;
 		i__3 = i__;
 		z__2.r = fact.r * du[i__3].r - fact.i * du[i__3].i, z__2.i = 
 			fact.r * du[i__3].i + fact.i * du[i__3].r;
 		z__1.r = d__[i__2].r - z__2.r, z__1.i = d__[i__2].i - z__2.i;
 		d__[i__1].r = z__1.r, d__[i__1].i = z__1.i;
 	    }
 	} else {
 	    z_div(&z__1, &d__[i__], &dl[i__]);
 	    fact.r = z__1.r, fact.i = z__1.i;
 	    i__1 = i__;
 	    i__2 = i__;
 	    d__[i__1].r = dl[i__2].r, d__[i__1].i = dl[i__2].i;
 	    i__1 = i__;
 	    dl[i__1].r = fact.r, dl[i__1].i = fact.i;
 	    i__1 = i__;
 	    temp.r = du[i__1].r, temp.i = du[i__1].i;
 	    i__1 = i__;
 	    i__2 = i__ + 1;
 	    du[i__1].r = d__[i__2].r, du[i__1].i = d__[i__2].i;
 	    i__1 = i__ + 1;
 	    i__2 = i__ + 1;
 	    z__2.r = fact.r * d__[i__2].r - fact.i * d__[i__2].i, z__2.i = 
 		    fact.r * d__[i__2].i + fact.i * d__[i__2].r;
 	    z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i;
 	    d__[i__1].r = z__1.r, d__[i__1].i = z__1.i;
 	    ipiv[i__] = i__ + 1;
 	}
    }

 /*     Check for a zero on the diagonal of U. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = i__;
 	if ((d__1 = d__[i__2].r, abs(d__1)) + (d__2 = d_imag(&d__[i__]), abs(
 		d__2)) == 0.) {
 	    *info = i__;
 	    goto L50;
 	}
 /* L40: */
    }
 L50:

    return 0;

 /*     End of ZGTTRF */

 } /* zgttrf_ */

--- a/lapack-netlib/SRC/zgttrs.c
+++ b/lapack-netlib/SRC/zgttrs.c
@@ -0,0 +1,637 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b ZGTTRS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZGTTRS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgttrs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgttrs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgttrs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, */
 /*                          INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZGTTRS solves one of the systems of equations */
 /* >    A * X = B,  A**T * X = B,  or  A**H * X = B, */
 /* > with a tridiagonal matrix A using the LU factorization computed */
 /* > by ZGTTRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations. */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is COMPLEX*16 array, dimension (N-1) */
 /* >          The (n-1) multipliers that define the matrix L from the */
 /* >          LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is COMPLEX*16 array, dimension (N) */
 /* >          The n diagonal elements of the upper triangular matrix U from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is COMPLEX*16 array, dimension (N-1) */
 /* >          The (n-1) elements of the first super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU2 */
 /* > \verbatim */
 /* >          DU2 is COMPLEX*16 array, dimension (N-2) */
 /* >          The (n-2) elements of the second super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the matrix of right hand side vectors B. */
 /* >          On exit, B is overwritten by the solution vectors X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16GTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zgttrs_(char *trans, integer *n, integer *nrhs, 
 	doublecomplex *dl, doublecomplex *d__, doublecomplex *du, 
 	doublecomplex *du2, integer *ipiv, doublecomplex *b, integer *ldb, 
 	integer *info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer j, jb, nb;
    extern /* Subroutine */ int zgtts2_(integer *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *
 	    , integer *, doublecomplex *, integer *), xerbla_(char *, integer 
 	    *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer itrans;
    logical notran;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    --du2;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    notran = *(unsigned char *)trans == 'N' || *(unsigned char *)trans == 'n';
    if (! notran && ! (*(unsigned char *)trans == 'T' || *(unsigned char *)
 	    trans == 't') && ! (*(unsigned char *)trans == 'C' || *(unsigned 
 	    char *)trans == 'c')) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*ldb < f2cmax(*n,1)) {
 	*info = -10;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGTTRS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	return 0;
    }

 /*     Decode TRANS */

    if (notran) {
 	itrans = 0;
    } else if (*(unsigned char *)trans == 'T' || *(unsigned char *)trans == 
 	    't') {
 	itrans = 1;
    } else {
 	itrans = 2;
    }

 /*     Determine the number of right-hand sides to solve at a time. */

    if (*nrhs == 1) {
 	nb = 1;
    } else {
 /* Computing MAX */
 	i__1 = 1, i__2 = ilaenv_(&c__1, "ZGTTRS", trans, n, nrhs, &c_n1, &
 		c_n1, (ftnlen)6, (ftnlen)1);
 	nb = f2cmax(i__1,i__2);
    }

    if (nb >= *nrhs) {
 	zgtts2_(&itrans, n, nrhs, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[1], 
 		&b[b_offset], ldb);
    } else {
 	i__1 = *nrhs;
 	i__2 = nb;
 	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 /* Computing MIN */
 	    i__3 = *nrhs - j + 1;
 	    jb = f2cmin(i__3,nb);
 	    zgtts2_(&itrans, n, &jb, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[
 		    1], &b[j * b_dim1 + 1], ldb);
 /* L10: */
 	}
    }

 /*     End of ZGTTRS */

    return 0;
 } /* zgttrs_ */

--- a/lapack-netlib/SRC/zgtts2.c
+++ b/lapack-netlib/SRC/zgtts2.c
--- a/lapack-netlib/SRC/zhb2st_kernels.c
+++ b/lapack-netlib/SRC/zhb2st_kernels.c
@@ -0,0 +1,802 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b ZHB2ST_KERNELS */

 /*  @precisions fortran z -> s d c */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHB2ST_KERNELS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhb2st_
 kernels.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhb2st_
 kernels.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhb2st_
 kernels.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE  ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, */
 /*                                   ST, ED, SWEEP, N, NB, IB, */
 /*                                   A, LDA, V, TAU, LDVT, WORK) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          UPLO */
 /*       LOGICAL            WANTZ */
 /*       INTEGER            TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT */
 /*       COMPLEX*16         A( LDA, * ), V( * ), */
 /*                          TAU( * ), WORK( * ) */

 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST */
 /* > subroutine. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* > \endverbatim */
 /* > */
 /* > \param[in] WANTZ */
 /* > \verbatim */
 /* >          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both */
 /* >          Eigenvalue/Eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TTYPE */
 /* > \verbatim */
 /* >          TTYPE is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ST */
 /* > \verbatim */
 /* >          ST is INTEGER */
 /* >          internal parameter for indices. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ED */
 /* > \verbatim */
 /* >          ED is INTEGER */
 /* >          internal parameter for indices. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] SWEEP */
 /* > \verbatim */
 /* >          SWEEP is INTEGER */
 /* >          internal parameter for indices. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER. The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB */
 /* > \verbatim */
 /* >          NB is INTEGER. The size of the band. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IB */
 /* > \verbatim */
 /* >          IB is INTEGER. */
 /* > \endverbatim */
 /* > */
 /* > \param[in, out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array. A pointer to the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER. The leading dimension of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] V */
 /* > \verbatim */
 /* >          V is COMPLEX*16 array, dimension 2*n if eigenvalues only are */
 /* >          requested or to be queried for vectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX*16 array, dimension (2*n). */
 /* >          The scalar factors of the Householder reflectors are stored */
 /* >          in this array. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDVT */
 /* > \verbatim */
 /* >          LDVT is INTEGER. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array. Workspace of size nb. */
 /* > \endverbatim */
 /* > */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  Implemented by Azzam Haidar. */
 /* > */
 /* >  All details are available on technical report, SC11, SC13 papers. */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zhb2st_kernels_(char *uplo, logical *wantz, integer *
 	ttype, integer *st, integer *ed, integer *sweep, integer *n, integer *
 	nb, integer *ib, doublecomplex *a, integer *lda, doublecomplex *v, 
 	doublecomplex *tau, integer *ldvt, doublecomplex *work)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublecomplex z__1;

    /* Local variables */
    doublecomplex ctmp;
    integer dpos, vpos, i__;
    extern logical lsame_(char *, char *);
    logical upper;
    integer j1, j2, lm, ln, ajeter;
    extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *);
    integer ofdpos;
    extern /* Subroutine */ int zlarfx_(char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *), zlarfy_(char *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *, doublecomplex *);
    integer taupos;



 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --v;
    --tau;
    --work;

    /* Function Body */
    ajeter = *ib + *ldvt;
    upper = lsame_(uplo, "U");
    if (upper) {
 	dpos = (*nb << 1) + 1;
 	ofdpos = *nb << 1;
    } else {
 	dpos = 1;
 	ofdpos = 2;
    }

 /*     Upper case */

    if (upper) {

 	if (*wantz) {
 	    vpos = (*sweep - 1) % 2 * *n + *st;
 	    taupos = (*sweep - 1) % 2 * *n + *st;
 	} else {
 	    vpos = (*sweep - 1) % 2 * *n + *st;
 	    taupos = (*sweep - 1) % 2 * *n + *st;
 	}

 	if (*ttype == 1) {
 	    lm = *ed - *st + 1;

 	    i__1 = vpos;
 	    v[i__1].r = 1., v[i__1].i = 0.;
 	    i__1 = lm - 1;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		i__2 = vpos + i__;
 		d_cnjg(&z__1, &a[ofdpos - i__ + (*st + i__) * a_dim1]);
 		v[i__2].r = z__1.r, v[i__2].i = z__1.i;
 		i__2 = ofdpos - i__ + (*st + i__) * a_dim1;
 		a[i__2].r = 0., a[i__2].i = 0.;
 /* L10: */
 	    }
 	    d_cnjg(&z__1, &a[ofdpos + *st * a_dim1]);
 	    ctmp.r = z__1.r, ctmp.i = z__1.i;
 	    zlarfg_(&lm, &ctmp, &v[vpos + 1], &c__1, &tau[taupos]);
 	    i__1 = ofdpos + *st * a_dim1;
 	    a[i__1].r = ctmp.r, a[i__1].i = ctmp.i;

 	    lm = *ed - *st + 1;
 	    d_cnjg(&z__1, &tau[taupos]);
 	    i__1 = *lda - 1;
 	    zlarfy_(uplo, &lm, &v[vpos], &c__1, &z__1, &a[dpos + *st * a_dim1]
 		    , &i__1, &work[1]);
 	}

 	if (*ttype == 3) {

 	    lm = *ed - *st + 1;
 	    d_cnjg(&z__1, &tau[taupos]);
 	    i__1 = *lda - 1;
 	    zlarfy_(uplo, &lm, &v[vpos], &c__1, &z__1, &a[dpos + *st * a_dim1]
 		    , &i__1, &work[1]);
 	}

 	if (*ttype == 2) {
 	    j1 = *ed + 1;
 /* Computing MIN */
 	    i__1 = *ed + *nb;
 	    j2 = f2cmin(i__1,*n);
 	    ln = *ed - *st + 1;
 	    lm = j2 - j1 + 1;
 	    if (lm > 0) {
 		d_cnjg(&z__1, &tau[taupos]);
 		i__1 = *lda - 1;
 		zlarfx_("Left", &ln, &lm, &v[vpos], &z__1, &a[dpos - *nb + j1 
 			* a_dim1], &i__1, &work[1]);

 		if (*wantz) {
 		    vpos = (*sweep - 1) % 2 * *n + j1;
 		    taupos = (*sweep - 1) % 2 * *n + j1;
 		} else {
 		    vpos = (*sweep - 1) % 2 * *n + j1;
 		    taupos = (*sweep - 1) % 2 * *n + j1;
 		}

 		i__1 = vpos;
 		v[i__1].r = 1., v[i__1].i = 0.;
 		i__1 = lm - 1;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    i__2 = vpos + i__;
 		    d_cnjg(&z__1, &a[dpos - *nb - i__ + (j1 + i__) * a_dim1]);
 		    v[i__2].r = z__1.r, v[i__2].i = z__1.i;
 		    i__2 = dpos - *nb - i__ + (j1 + i__) * a_dim1;
 		    a[i__2].r = 0., a[i__2].i = 0.;
 /* L30: */
 		}
 		d_cnjg(&z__1, &a[dpos - *nb + j1 * a_dim1]);
 		ctmp.r = z__1.r, ctmp.i = z__1.i;
 		zlarfg_(&lm, &ctmp, &v[vpos + 1], &c__1, &tau[taupos]);
 		i__1 = dpos - *nb + j1 * a_dim1;
 		a[i__1].r = ctmp.r, a[i__1].i = ctmp.i;

 		i__1 = ln - 1;
 		i__2 = *lda - 1;
 		zlarfx_("Right", &i__1, &lm, &v[vpos], &tau[taupos], &a[dpos 
 			- *nb + 1 + j1 * a_dim1], &i__2, &work[1]);
 	    }
 	}

 /*     Lower case */

    } else {

 	if (*wantz) {
 	    vpos = (*sweep - 1) % 2 * *n + *st;
 	    taupos = (*sweep - 1) % 2 * *n + *st;
 	} else {
 	    vpos = (*sweep - 1) % 2 * *n + *st;
 	    taupos = (*sweep - 1) % 2 * *n + *st;
 	}

 	if (*ttype == 1) {
 	    lm = *ed - *st + 1;

 	    i__1 = vpos;
 	    v[i__1].r = 1., v[i__1].i = 0.;
 	    i__1 = lm - 1;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		i__2 = vpos + i__;
 		i__3 = ofdpos + i__ + (*st - 1) * a_dim1;
 		v[i__2].r = a[i__3].r, v[i__2].i = a[i__3].i;
 		i__2 = ofdpos + i__ + (*st - 1) * a_dim1;
 		a[i__2].r = 0., a[i__2].i = 0.;
 /* L20: */
 	    }
 	    zlarfg_(&lm, &a[ofdpos + (*st - 1) * a_dim1], &v[vpos + 1], &c__1,
 		     &tau[taupos]);

 	    lm = *ed - *st + 1;

 	    d_cnjg(&z__1, &tau[taupos]);
 	    i__1 = *lda - 1;
 	    zlarfy_(uplo, &lm, &v[vpos], &c__1, &z__1, &a[dpos + *st * a_dim1]
 		    , &i__1, &work[1]);
 	}

 	if (*ttype == 3) {
 	    lm = *ed - *st + 1;

 	    d_cnjg(&z__1, &tau[taupos]);
 	    i__1 = *lda - 1;
 	    zlarfy_(uplo, &lm, &v[vpos], &c__1, &z__1, &a[dpos + *st * a_dim1]
 		    , &i__1, &work[1]);
 	}

 	if (*ttype == 2) {
 	    j1 = *ed + 1;
 /* Computing MIN */
 	    i__1 = *ed + *nb;
 	    j2 = f2cmin(i__1,*n);
 	    ln = *ed - *st + 1;
 	    lm = j2 - j1 + 1;

 	    if (lm > 0) {
 		i__1 = *lda - 1;
 		zlarfx_("Right", &lm, &ln, &v[vpos], &tau[taupos], &a[dpos + *
 			nb + *st * a_dim1], &i__1, &work[1]);

 		if (*wantz) {
 		    vpos = (*sweep - 1) % 2 * *n + j1;
 		    taupos = (*sweep - 1) % 2 * *n + j1;
 		} else {
 		    vpos = (*sweep - 1) % 2 * *n + j1;
 		    taupos = (*sweep - 1) % 2 * *n + j1;
 		}

 		i__1 = vpos;
 		v[i__1].r = 1., v[i__1].i = 0.;
 		i__1 = lm - 1;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    i__2 = vpos + i__;
 		    i__3 = dpos + *nb + i__ + *st * a_dim1;
 		    v[i__2].r = a[i__3].r, v[i__2].i = a[i__3].i;
 		    i__2 = dpos + *nb + i__ + *st * a_dim1;
 		    a[i__2].r = 0., a[i__2].i = 0.;
 /* L40: */
 		}
 		zlarfg_(&lm, &a[dpos + *nb + *st * a_dim1], &v[vpos + 1], &
 			c__1, &tau[taupos]);

 		i__1 = ln - 1;
 		d_cnjg(&z__1, &tau[taupos]);
 		i__2 = *lda - 1;
 		zlarfx_("Left", &lm, &i__1, &v[vpos], &z__1, &a[dpos + *nb - 
 			1 + (*st + 1) * a_dim1], &i__2, &work[1]);
 	    }
 	}
    }

    return 0;

 /*     END OF ZHB2ST_KERNELS */

 } /* zhb2st_kernels__ */

--- a/lapack-netlib/SRC/zhbev.c
+++ b/lapack-netlib/SRC/zhbev.c
@@ -0,0 +1,715 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublereal c_b11 = 1.;
 static integer c__1 = 1;

 /* > \brief <b> ZHBEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER m
 atrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHBEV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbev.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbev.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbev.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, */
 /*                         RWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KD, LDAB, LDZ, N */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHBEV computes all the eigenvalues and, optionally, eigenvectors of */
 /* > a complex Hermitian band matrix A. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX*16 array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > */
 /* >          On exit, AB is overwritten by values generated during the */
 /* >          reduction to tridiagonal form.  If UPLO = 'U', the first */
 /* >          superdiagonal and the diagonal of the tridiagonal matrix T */
 /* >          are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */
 /* >          the diagonal and first subdiagonal of T are returned in the */
 /* >          first two rows of AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD + 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX*16 array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */
 /* >          eigenvectors of the matrix A, with the i-th column of Z */
 /* >          holding the eigenvector associated with W(i). */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (f2cmax(1,3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16OTHEReigen */

 /*  ===================================================================== */
 /* Subroutine */ int zhbev_(char *jobz, char *uplo, integer *n, integer *kd, 
 	doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, 
 	integer *ldz, doublecomplex *work, doublereal *rwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, z_dim1, z_offset, i__1;
    doublereal d__1;

    /* Local variables */
    integer inde;
    doublereal anrm;
    integer imax;
    doublereal rmin, rmax;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    doublereal sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    logical lower, wantz;
    extern doublereal dlamch_(char *);
    integer iscale;
    doublereal safmin;
    extern doublereal zlanhb_(char *, char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublereal *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum;
    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), zlascl_(char *, integer *, integer *, doublereal *, 
 	    doublereal *, integer *, integer *, doublecomplex *, integer *, 
 	    integer *), zhbtrd_(char *, char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublereal *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
    integer indrwk;
    doublereal smlnum;
    extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublereal *, integer *);
    doublereal eps;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*kd < 0) {
 	*info = -4;
    } else if (*ldab < *kd + 1) {
 	*info = -6;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -9;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHBEV ", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	if (lower) {
 	    i__1 = ab_dim1 + 1;
 	    w[1] = ab[i__1].r;
 	} else {
 	    i__1 = *kd + 1 + ab_dim1;
 	    w[1] = ab[i__1].r;
 	}
 	if (wantz) {
 	    i__1 = z_dim1 + 1;
 	    z__[i__1].r = 1., z__[i__1].i = 0.;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = dlamch_("Safe minimum");
    eps = dlamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1. / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]);
    iscale = 0;
    if (anrm > 0. && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	if (lower) {
 	    zlascl_("B", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	} else {
 	    zlascl_("Q", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	}
    }

 /*     Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. */

    inde = 1;
    zhbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &rwork[inde], &
 	    z__[z_offset], ldz, &work[1], &iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEQR. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	indrwk = inde + *n;
 	zsteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[
 		indrwk], info);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	d__1 = 1. / sigma;
 	dscal_(&imax, &d__1, &w[1], &c__1);
    }

    return 0;

 /*     End of ZHBEV */

 } /* zhbev_ */

--- a/lapack-netlib/SRC/zhbev_2stage.c
+++ b/lapack-netlib/SRC/zhbev_2stage.c
@@ -0,0 +1,821 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__2 = 2;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__4 = 4;
 static doublereal c_b21 = 1.;
 static integer c__1 = 1;

 /* > \brief <b> ZHBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for 
 OTHER matrices</b> */

 /*  @precisions fortran z -> s d c */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHBEV_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbev_2
 stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbev_2
 stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbev_2
 stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, */
 /*                                WORK, LWORK, RWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KD, LDAB, LDZ, N, LWORK */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of */
 /* > a complex Hermitian band matrix A using the 2stage technique for */
 /* > the reduction to tridiagonal. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX*16 array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > */
 /* >          On exit, AB is overwritten by values generated during the */
 /* >          reduction to tridiagonal form.  If UPLO = 'U', the first */
 /* >          superdiagonal and the diagonal of the tridiagonal matrix T */
 /* >          are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */
 /* >          the diagonal and first subdiagonal of T are returned in the */
 /* >          first two rows of AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD + 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX*16 array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */
 /* >          eigenvectors of the matrix A, with the i-th column of Z */
 /* >          holding the eigenvector associated with W(i). */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension LWORK */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. LWORK >= 1, when N <= 1; */
 /* >          otherwise */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = (2KD+1)*N + KD*NTHREADS */
 /* >                                   where KD is the size of the band. */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (f2cmax(1,3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16OTHEReigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int zhbev_2stage_(char *jobz, char *uplo, integer *n, 
 	integer *kd, doublecomplex *ab, integer *ldab, doublereal *w, 
 	doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork,
 	 doublereal *rwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, z_dim1, z_offset, i__1;
    doublereal d__1;

    /* Local variables */
    integer inde;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    doublereal anrm;
    integer imax;
    extern /* Subroutine */ int zhetrd_hb2st_(char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, integer *);
    doublereal rmin, rmax;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    doublereal sigma;
    extern logical lsame_(char *, char *);
    integer iinfo, lhtrd, lwmin;
    logical lower;
    integer lwtrd;
    logical wantz;
    integer ib;
    extern doublereal dlamch_(char *);
    integer iscale;
    doublereal safmin;
    extern doublereal zlanhb_(char *, char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublereal *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum;
    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), zlascl_(char *, integer *, integer *, doublereal *, 
 	    doublereal *, integer *, integer *, doublecomplex *, integer *, 
 	    integer *);
    integer indwrk, indrwk, llwork;
    doublereal smlnum;
    logical lquery;
    extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublereal *, integer *);
    doublereal eps;
    integer indhous;



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1;

    *info = 0;
    if (! lsame_(jobz, "N")) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*kd < 0) {
 	*info = -4;
    } else if (*ldab < *kd + 1) {
 	*info = -6;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -9;
    }

    if (*info == 0) {
 	if (*n <= 1) {
 	    lwmin = 1;
 	    work[1].r = (doublereal) lwmin, work[1].i = 0.;
 	} else {
 	    ib = ilaenv2stage_(&c__2, "ZHETRD_HB2ST", jobz, n, kd, &c_n1, &
 		    c_n1);
 	    lhtrd = ilaenv2stage_(&c__3, "ZHETRD_HB2ST", jobz, n, kd, &ib, &
 		    c_n1);
 	    lwtrd = ilaenv2stage_(&c__4, "ZHETRD_HB2ST", jobz, n, kd, &ib, &
 		    c_n1);
 	    lwmin = lhtrd + lwtrd;
 	    work[1].r = (doublereal) lwmin, work[1].i = 0.;
 	}

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHBEV_2STAGE ", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	if (lower) {
 	    i__1 = ab_dim1 + 1;
 	    w[1] = ab[i__1].r;
 	} else {
 	    i__1 = *kd + 1 + ab_dim1;
 	    w[1] = ab[i__1].r;
 	}
 	if (wantz) {
 	    i__1 = z_dim1 + 1;
 	    z__[i__1].r = 1., z__[i__1].i = 0.;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = dlamch_("Safe minimum");
    eps = dlamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1. / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]);
    iscale = 0;
    if (anrm > 0. && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	if (lower) {
 	    zlascl_("B", kd, kd, &c_b21, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	} else {
 	    zlascl_("Q", kd, kd, &c_b21, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	}
    }

 /*     Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */

    inde = 1;
    indhous = 1;
    indwrk = indhous + lhtrd;
    llwork = *lwork - indwrk + 1;

    zhetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &
 	    rwork[inde], &work[indhous], &lhtrd, &work[indwrk], &llwork, &
 	    iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEQR. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	indrwk = inde + *n;
 	zsteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[
 		indrwk], info);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	d__1 = 1. / sigma;
 	dscal_(&imax, &d__1, &w[1], &c__1);
    }

 /*     Set WORK(1) to optimal workspace size. */

    work[1].r = (doublereal) lwmin, work[1].i = 0.;

    return 0;

 /*     End of ZHBEV_2STAGE */

 } /* zhbev_2stage__ */

--- a/lapack-netlib/SRC/zhbevd.c
+++ b/lapack-netlib/SRC/zhbevd.c
@@ -0,0 +1,836 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {0.,0.};
 static doublecomplex c_b2 = {1.,0.};
 static doublereal c_b13 = 1.;
 static integer c__1 = 1;

 /* > \brief <b> ZHBEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER 
 matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHBEVD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbevd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbevd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbevd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, */
 /*                          LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHBEVD computes all the eigenvalues and, optionally, eigenvectors of */
 /* > a complex Hermitian band matrix A.  If eigenvectors are desired, it */
 /* > uses a divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX*16 array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > */
 /* >          On exit, AB is overwritten by values generated during the */
 /* >          reduction to tridiagonal form.  If UPLO = 'U', the first */
 /* >          superdiagonal and the diagonal of the tridiagonal matrix T */
 /* >          are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */
 /* >          the diagonal and first subdiagonal of T are returned in the */
 /* >          first two rows of AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD + 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX*16 array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */
 /* >          eigenvectors of the matrix A, with the i-th column of Z */
 /* >          holding the eigenvector associated with W(i). */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If N <= 1,               LWORK must be at least 1. */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be at least N. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, */
 /* >                                         dimension (LRWORK) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of array RWORK. */
 /* >          If N <= 1,               LRWORK must be at least 1. */
 /* >          If JOBZ = 'N' and N > 1, LRWORK must be at least N. */
 /* >          If JOBZ = 'V' and N > 1, LRWORK must be at least */
 /* >                        1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of array IWORK. */
 /* >          If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */
 /* >          If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16OTHEReigen */

 /*  ===================================================================== */
 /* Subroutine */ int zhbevd_(char *jobz, char *uplo, integer *n, integer *kd, 
 	doublecomplex *ab, integer *ldab, doublereal *w, doublecomplex *z__, 
 	integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork, 
 	integer *lrwork, integer *iwork, integer *liwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, z_dim1, z_offset, i__1;
    doublereal d__1;

    /* Local variables */
    integer inde;
    doublereal anrm;
    integer imax;
    doublereal rmin, rmax;
    integer llwk2;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    doublereal sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *);
    integer lwmin;
    logical lower;
    integer llrwk;
    logical wantz;
    integer indwk2;
    extern doublereal dlamch_(char *);
    integer iscale;
    doublereal safmin;
    extern doublereal zlanhb_(char *, char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublereal *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum;
    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), zlascl_(char *, integer *, integer *, doublereal *, 
 	    doublereal *, integer *, integer *, doublecomplex *, integer *, 
 	    integer *), zstedc_(char *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublereal *, integer *, integer *, integer *, integer 
 	    *), zhbtrd_(char *, char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublereal *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
    integer indwrk, liwmin;
    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
    integer lrwmin;
    doublereal smlnum;
    logical lquery;
    doublereal eps;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1 || *liwork == -1 || *lrwork == -1;

    *info = 0;
    if (*n <= 1) {
 	lwmin = 1;
 	lrwmin = 1;
 	liwmin = 1;
    } else {
 	if (wantz) {
 /* Computing 2nd power */
 	    i__1 = *n;
 	    lwmin = i__1 * i__1 << 1;
 /* Computing 2nd power */
 	    i__1 = *n;
 	    lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 	    liwmin = *n * 5 + 3;
 	} else {
 	    lwmin = *n;
 	    lrwmin = *n;
 	    liwmin = 1;
 	}
    }
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*kd < 0) {
 	*info = -4;
    } else if (*ldab < *kd + 1) {
 	*info = -6;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -9;
    }

    if (*info == 0) {
 	work[1].r = (doublereal) lwmin, work[1].i = 0.;
 	rwork[1] = (doublereal) lrwmin;
 	iwork[1] = liwmin;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -13;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -15;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHBEVD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = ab_dim1 + 1;
 	w[1] = ab[i__1].r;
 	if (wantz) {
 	    i__1 = z_dim1 + 1;
 	    z__[i__1].r = 1., z__[i__1].i = 0.;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = dlamch_("Safe minimum");
    eps = dlamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1. / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]);
    iscale = 0;
    if (anrm > 0. && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	if (lower) {
 	    zlascl_("B", kd, kd, &c_b13, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	} else {
 	    zlascl_("Q", kd, kd, &c_b13, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	}
    }

 /*     Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. */

    inde = 1;
    indwrk = inde + *n;
    indwk2 = *n * *n + 1;
    llwk2 = *lwork - indwk2 + 1;
    llrwk = *lrwork - indwrk + 1;
    zhbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &rwork[inde], &
 	    z__[z_offset], ldz, &work[1], &iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEDC. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	zstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], &
 		llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info);
 	zgemm_("N", "N", n, n, n, &c_b2, &z__[z_offset], ldz, &work[1], n, &
 		c_b1, &work[indwk2], n);
 	zlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	d__1 = 1. / sigma;
 	dscal_(&imax, &d__1, &w[1], &c__1);
    }

    work[1].r = (doublereal) lwmin, work[1].i = 0.;
    rwork[1] = (doublereal) lrwmin;
    iwork[1] = liwmin;
    return 0;

 /*     End of ZHBEVD */

 } /* zhbevd_ */

--- a/lapack-netlib/SRC/zhbevd_2stage.c
+++ b/lapack-netlib/SRC/zhbevd_2stage.c
@@ -0,0 +1,901 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {0.,0.};
 static doublecomplex c_b2 = {1.,0.};
 static integer c__2 = 2;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__4 = 4;
 static doublereal c_b23 = 1.;
 static integer c__1 = 1;

 /* > \brief <b> ZHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for
 OTHER matrices</b> */

 /*  @precisions fortran z -> s d c */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHBEVD_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbevd_
 2stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbevd_
 2stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbevd_
 2stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, */
 /*                                 WORK, LWORK, RWORK, LRWORK, IWORK, */
 /*                                 LIWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         AB( LDAB, * ), WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of */
 /* > a complex Hermitian band matrix A using the 2stage technique for */
 /* > the reduction to tridiagonal.  If eigenvectors are desired, it */
 /* > uses a divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX*16 array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > */
 /* >          On exit, AB is overwritten by values generated during the */
 /* >          reduction to tridiagonal form.  If UPLO = 'U', the first */
 /* >          superdiagonal and the diagonal of the tridiagonal matrix T */
 /* >          are returned in rows KD and KD+1 of AB, and if UPLO = 'L', */
 /* >          the diagonal and first subdiagonal of T are returned in the */
 /* >          first two rows of AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD + 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX*16 array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal */
 /* >          eigenvectors of the matrix A, with the i-th column of Z */
 /* >          holding the eigenvector associated with W(i). */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. LWORK >= 1, when N <= 1; */
 /* >          otherwise */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = (2KD+1)*N + KD*NTHREADS */
 /* >                                   where KD is the size of the band. */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, */
 /* >                                         dimension (LRWORK) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of array RWORK. */
 /* >          If N <= 1,               LRWORK must be at least 1. */
 /* >          If JOBZ = 'N' and N > 1, LRWORK must be at least N. */
 /* >          If JOBZ = 'V' and N > 1, LRWORK must be at least */
 /* >                        1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of array IWORK. */
 /* >          If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. */
 /* >          If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16OTHEReigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int zhbevd_2stage_(char *jobz, char *uplo, integer *n, 
 	integer *kd, doublecomplex *ab, integer *ldab, doublereal *w, 
 	doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork,
 	 doublereal *rwork, integer *lrwork, integer *iwork, integer *liwork, 
 	integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, z_dim1, z_offset, i__1, i__2;
    doublereal d__1;

    /* Local variables */
    integer inde;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    doublereal anrm;
    integer imax;
    extern /* Subroutine */ int zhetrd_hb2st_(char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, integer *);
    doublereal rmin, rmax;
    integer llwk2;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    doublereal sigma;
    extern logical lsame_(char *, char *);
    integer iinfo, indwk, lhtrd;
    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *);
    integer lwmin;
    logical lower;
    integer lwtrd, llrwk;
    logical wantz;
    integer indwk2, ib;
    extern doublereal dlamch_(char *);
    integer iscale;
    doublereal safmin;
    extern doublereal zlanhb_(char *, char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublereal *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum;
    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), zlascl_(char *, integer *, integer *, doublereal *, 
 	    doublereal *, integer *, integer *, doublecomplex *, integer *, 
 	    integer *), zstedc_(char *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublereal *, integer *, integer *, integer *, integer 
 	    *);
    integer indrwk, liwmin;
    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
    integer lrwmin, llwork;
    doublereal smlnum;
    logical lquery;
    doublereal eps;
    integer indhous;



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1 || *liwork == -1 || *lrwork == -1;

    *info = 0;
    if (*n <= 1) {
 	lwmin = 1;
 	lrwmin = 1;
 	liwmin = 1;
    } else {
 	ib = ilaenv2stage_(&c__2, "ZHETRD_HB2ST", jobz, n, kd, &c_n1, &c_n1);
 	lhtrd = ilaenv2stage_(&c__3, "ZHETRD_HB2ST", jobz, n, kd, &ib, &c_n1);
 	lwtrd = ilaenv2stage_(&c__4, "ZHETRD_HB2ST", jobz, n, kd, &ib, &c_n1);
 	if (wantz) {
 /* Computing 2nd power */
 	    i__1 = *n;
 	    lwmin = i__1 * i__1 << 1;
 /* Computing 2nd power */
 	    i__1 = *n;
 	    lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 	    liwmin = *n * 5 + 3;
 	} else {
 /* Computing MAX */
 	    i__1 = *n, i__2 = lhtrd + lwtrd;
 	    lwmin = f2cmax(i__1,i__2);
 	    lrwmin = *n;
 	    liwmin = 1;
 	}
    }
    if (! lsame_(jobz, "N")) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*kd < 0) {
 	*info = -4;
    } else if (*ldab < *kd + 1) {
 	*info = -6;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -9;
    }

    if (*info == 0) {
 	work[1].r = (doublereal) lwmin, work[1].i = 0.;
 	rwork[1] = (doublereal) lrwmin;
 	iwork[1] = liwmin;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -13;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -15;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHBEVD_2STAGE", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = ab_dim1 + 1;
 	w[1] = ab[i__1].r;
 	if (wantz) {
 	    i__1 = z_dim1 + 1;
 	    z__[i__1].r = 1., z__[i__1].i = 0.;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = dlamch_("Safe minimum");
    eps = dlamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1. / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = zlanhb_("M", uplo, n, kd, &ab[ab_offset], ldab, &rwork[1]);
    iscale = 0;
    if (anrm > 0. && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	if (lower) {
 	    zlascl_("B", kd, kd, &c_b23, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	} else {
 	    zlascl_("Q", kd, kd, &c_b23, &sigma, n, n, &ab[ab_offset], ldab, 
 		    info);
 	}
    }

 /*     Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. */

    inde = 1;
    indrwk = inde + *n;
    llrwk = *lrwork - indrwk + 1;
    indhous = 1;
    indwk = indhous + lhtrd;
    llwork = *lwork - indwk + 1;
    indwk2 = indwk + *n * *n;
    llwk2 = *lwork - indwk2 + 1;

    zhetrd_hb2st_("N", jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &
 	    rwork[inde], &work[indhous], &lhtrd, &work[indwk], &llwork, &
 	    iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEDC. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	zstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], &
 		llwk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
 	zgemm_("N", "N", n, n, n, &c_b2, &z__[z_offset], ldz, &work[1], n, &
 		c_b1, &work[indwk2], n);
 	zlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	d__1 = 1. / sigma;
 	dscal_(&imax, &d__1, &w[1], &c__1);
    }

    work[1].r = (doublereal) lwmin, work[1].i = 0.;
    rwork[1] = (doublereal) lrwmin;
    iwork[1] = liwmin;
    return 0;

 /*     End of ZHBEVD_2STAGE */

 } /* zhbevd_2stage__ */

--- a/lapack-netlib/SRC/zhbevx.c
+++ b/lapack-netlib/SRC/zhbevx.c
--- a/lapack-netlib/SRC/zhbevx_2stage.c
+++ b/lapack-netlib/SRC/zhbevx_2stage.c
--- a/lapack-netlib/SRC/zhbgst.c
+++ b/lapack-netlib/SRC/zhbgst.c
--- a/lapack-netlib/SRC/zhbgv.c
+++ b/lapack-netlib/SRC/zhbgv.c
@@ -0,0 +1,696 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b ZHBGV */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHBGV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbgv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbgv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbgv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, */
 /*                         LDZ, WORK, RWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, N */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHBGV computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite banded eigenproblem, of */
 /* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */
 /* > and banded, and B is also positive definite. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KA */
 /* > \verbatim */
 /* >          KA is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KA >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KB */
 /* > \verbatim */
 /* >          KB is INTEGER */
 /* >          The number of superdiagonals of the matrix B if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KB >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX*16 array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first ka+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+ka). */
 /* > */
 /* >          On exit, the contents of AB are destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KA+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] BB */
 /* > \verbatim */
 /* >          BB is COMPLEX*16 array, dimension (LDBB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix B, stored in the first kb+1 rows of the array.  The */
 /* >          j-th column of B is stored in the j-th column of the array BB */
 /* >          as follows: */
 /* >          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */
 /* >          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=f2cmin(n,j+kb). */
 /* > */
 /* >          On exit, the factor S from the split Cholesky factorization */
 /* >          B = S**H*S, as returned by ZPBSTF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDBB */
 /* > \verbatim */
 /* >          LDBB is INTEGER */
 /* >          The leading dimension of the array BB.  LDBB >= KB+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX*16 array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */
 /* >          eigenvectors, with the i-th column of Z holding the */
 /* >          eigenvector associated with W(i). The eigenvectors are */
 /* >          normalized so that Z**H*B*Z = I. */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (3*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is: */
 /* >             <= N:  the algorithm failed to converge: */
 /* >                    i off-diagonal elements of an intermediate */
 /* >                    tridiagonal form did not converge to zero; */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then ZPBSTF */
 /* >                    returned INFO = i: B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16OTHEReigen */

 /*  ===================================================================== */
 /* Subroutine */ int zhbgv_(char *jobz, char *uplo, integer *n, integer *ka, 
 	integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, 
 	integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, 
 	doublecomplex *work, doublereal *rwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1;

    /* Local variables */
    integer inde;
    char vect[1];
    extern logical lsame_(char *, char *);
    integer iinfo;
    logical upper, wantz;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dsterf_(
 	    integer *, doublereal *, doublereal *, integer *), zhbtrd_(char *,
 	     char *, integer *, integer *, doublecomplex *, integer *, 
 	    doublereal *, doublereal *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *);
    integer indwrk;
    extern /* Subroutine */ int zhbgst_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
 	     doublecomplex *, integer *, doublecomplex *, doublereal *, 
 	    integer *), zpbstf_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *), zsteqr_(char *, 
 	    integer *, doublereal *, doublereal *, doublecomplex *, integer *,
 	     doublereal *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    bb_dim1 = *ldbb;
    bb_offset = 1 + bb_dim1 * 1;
    bb -= bb_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ka < 0) {
 	*info = -4;
    } else if (*kb < 0 || *kb > *ka) {
 	*info = -5;
    } else if (*ldab < *ka + 1) {
 	*info = -7;
    } else if (*ldbb < *kb + 1) {
 	*info = -9;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -12;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHBGV ", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a split Cholesky factorization of B. */

    zpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem. */

    inde = 1;
    indwrk = inde + *n;
    zhbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb,
 	     &z__[z_offset], ldz, &work[1], &rwork[indwrk], &iinfo);

 /*     Reduce to tridiagonal form. */

    if (wantz) {
 	*(unsigned char *)vect = 'U';
    } else {
 	*(unsigned char *)vect = 'N';
    }
    zhbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], &
 	    z__[z_offset], ldz, &work[1], &iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEQR. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	zsteqr_(jobz, n, &w[1], &rwork[inde], &z__[z_offset], ldz, &rwork[
 		indwrk], info);
    }
    return 0;

 /*     End of ZHBGV */

 } /* zhbgv_ */

--- a/lapack-netlib/SRC/zhbgvd.c
+++ b/lapack-netlib/SRC/zhbgvd.c
@@ -0,0 +1,829 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static doublecomplex c_b2 = {0.,0.};

 /* > \brief \b ZHBGVD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHBGVD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbgvd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbgvd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbgvd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, */
 /*                          Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, */
 /*                          LIWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, */
 /*      $                   LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         AB( LDAB, * ), BB( LDBB, * ), WORK( * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHBGVD computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite banded eigenproblem, of */
 /* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */
 /* > and banded, and B is also positive definite.  If eigenvectors are */
 /* > desired, it uses a divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KA */
 /* > \verbatim */
 /* >          KA is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KA >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KB */
 /* > \verbatim */
 /* >          KB is INTEGER */
 /* >          The number of superdiagonals of the matrix B if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KB >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX*16 array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first ka+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+ka). */
 /* > */
 /* >          On exit, the contents of AB are destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KA+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] BB */
 /* > \verbatim */
 /* >          BB is COMPLEX*16 array, dimension (LDBB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix B, stored in the first kb+1 rows of the array.  The */
 /* >          j-th column of B is stored in the j-th column of the array BB */
 /* >          as follows: */
 /* >          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */
 /* >          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=f2cmin(n,j+kb). */
 /* > */
 /* >          On exit, the factor S from the split Cholesky factorization */
 /* >          B = S**H*S, as returned by ZPBSTF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDBB */
 /* > \verbatim */
 /* >          LDBB is INTEGER */
 /* >          The leading dimension of the array BB.  LDBB >= KB+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX*16 array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */
 /* >          eigenvectors, with the i-th column of Z holding the */
 /* >          eigenvector associated with W(i). The eigenvectors are */
 /* >          normalized so that Z**H*B*Z = I. */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO=0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If N <= 1,               LWORK >= 1. */
 /* >          If JOBZ = 'N' and N > 1, LWORK >= N. */
 /* >          If JOBZ = 'V' and N > 1, LWORK >= 2*N**2. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) */
 /* >          On exit, if INFO=0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of array RWORK. */
 /* >          If N <= 1,               LRWORK >= 1. */
 /* >          If JOBZ = 'N' and N > 1, LRWORK >= N. */
 /* >          If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO=0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of array IWORK. */
 /* >          If JOBZ = 'N' or N <= 1, LIWORK >= 1. */
 /* >          If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is: */
 /* >             <= N:  the algorithm failed to converge: */
 /* >                    i off-diagonal elements of an intermediate */
 /* >                    tridiagonal form did not converge to zero; */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then ZPBSTF */
 /* >                    returned INFO = i: B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup complex16OTHEReigen */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

 /*  ===================================================================== */
 /* Subroutine */ int zhbgvd_(char *jobz, char *uplo, integer *n, integer *ka, 
 	integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, 
 	integer *ldbb, doublereal *w, doublecomplex *z__, integer *ldz, 
 	doublecomplex *work, integer *lwork, doublereal *rwork, integer *
 	lrwork, integer *iwork, integer *liwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1;

    /* Local variables */
    integer inde;
    char vect[1];
    integer llwk2;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *);
    integer lwmin;
    logical upper;
    integer llrwk;
    logical wantz;
    integer indwk2;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dsterf_(
 	    integer *, doublereal *, doublereal *, integer *), zstedc_(char *,
 	     integer *, doublereal *, doublereal *, doublecomplex *, integer *
 	    , doublecomplex *, integer *, doublereal *, integer *, integer *, 
 	    integer *, integer *), zhbtrd_(char *, char *, integer *, 
 	    integer *, doublecomplex *, integer *, doublereal *, doublereal *,
 	     doublecomplex *, integer *, doublecomplex *, integer *);
    integer indwrk, liwmin;
    extern /* Subroutine */ int zhbgst_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
 	     doublecomplex *, integer *, doublecomplex *, doublereal *, 
 	    integer *), zlacpy_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
    integer lrwmin;
    extern /* Subroutine */ int zpbstf_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *);
    logical lquery;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    bb_dim1 = *ldbb;
    bb_offset = 1 + bb_dim1 * 1;
    bb -= bb_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (*n <= 1) {
 	lwmin = *n + 1;
 	lrwmin = *n + 1;
 	liwmin = 1;
    } else if (wantz) {
 /* Computing 2nd power */
 	i__1 = *n;
 	lwmin = i__1 * i__1 << 1;
 /* Computing 2nd power */
 	i__1 = *n;
 	lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 	liwmin = *n * 5 + 3;
    } else {
 	lwmin = *n;
 	lrwmin = *n;
 	liwmin = 1;
    }
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ka < 0) {
 	*info = -4;
    } else if (*kb < 0 || *kb > *ka) {
 	*info = -5;
    } else if (*ldab < *ka + 1) {
 	*info = -7;
    } else if (*ldbb < *kb + 1) {
 	*info = -9;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
 	*info = -12;
    }

    if (*info == 0) {
 	work[1].r = (doublereal) lwmin, work[1].i = 0.;
 	rwork[1] = (doublereal) lrwmin;
 	iwork[1] = liwmin;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -14;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -16;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -18;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHBGVD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a split Cholesky factorization of B. */

    zpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem. */

    inde = 1;
    indwrk = inde + *n;
    indwk2 = *n * *n + 1;
    llwk2 = *lwork - indwk2 + 2;
    llrwk = *lrwork - indwrk + 2;
    zhbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb,
 	     &z__[z_offset], ldz, &work[1], &rwork[1], &iinfo);

 /*     Reduce Hermitian band matrix to tridiagonal form. */

    if (wantz) {
 	*(unsigned char *)vect = 'U';
    } else {
 	*(unsigned char *)vect = 'N';
    }
    zhbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], &
 	    z__[z_offset], ldz, &work[1], &iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEDC. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	zstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], &
 		llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info);
 	zgemm_("N", "N", n, n, n, &c_b1, &z__[z_offset], ldz, &work[1], n, &
 		c_b2, &work[indwk2], n);
 	zlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz);
    }

    work[1].r = (doublereal) lwmin, work[1].i = 0.;
    rwork[1] = (doublereal) lrwmin;
    iwork[1] = liwmin;
    return 0;

 /*     End of ZHBGVD */

 } /* zhbgvd_ */

--- a/lapack-netlib/SRC/zhbgvx.c
+++ b/lapack-netlib/SRC/zhbgvx.c
@@ -0,0 +1,976 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {0.,0.};
 static doublecomplex c_b2 = {1.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZHBGVX */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHBGVX + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbgvx.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbgvx.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbgvx.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, */
 /*                          LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, */
 /*                          LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) */

 /*       CHARACTER          JOBZ, RANGE, UPLO */
 /*       INTEGER            IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, */
 /*      $                   N */
 /*       DOUBLE PRECISION   ABSTOL, VL, VU */
 /*       INTEGER            IFAIL( * ), IWORK( * ) */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), */
 /*      $                   WORK( * ), Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHBGVX computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite banded eigenproblem, of */
 /* > the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */
 /* > and banded, and B is also positive definite.  Eigenvalues and */
 /* > eigenvectors can be selected by specifying either all eigenvalues, */
 /* > a range of values or a range of indices for the desired eigenvalues. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] RANGE */
 /* > \verbatim */
 /* >          RANGE is CHARACTER*1 */
 /* >          = 'A': all eigenvalues will be found; */
 /* >          = 'V': all eigenvalues in the half-open interval (VL,VU] */
 /* >                 will be found; */
 /* >          = 'I': the IL-th through IU-th eigenvalues will be found. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KA */
 /* > \verbatim */
 /* >          KA is INTEGER */
 /* >          The number of superdiagonals of the matrix A if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KA >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KB */
 /* > \verbatim */
 /* >          KB is INTEGER */
 /* >          The number of superdiagonals of the matrix B if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'. KB >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX*16 array, dimension (LDAB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first ka+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for f2cmax(1,j-ka)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+ka). */
 /* > */
 /* >          On exit, the contents of AB are destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KA+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] BB */
 /* > \verbatim */
 /* >          BB is COMPLEX*16 array, dimension (LDBB, N) */
 /* >          On entry, the upper or lower triangle of the Hermitian band */
 /* >          matrix B, stored in the first kb+1 rows of the array.  The */
 /* >          j-th column of B is stored in the j-th column of the array BB */
 /* >          as follows: */
 /* >          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for f2cmax(1,j-kb)<=i<=j; */
 /* >          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=f2cmin(n,j+kb). */
 /* > */
 /* >          On exit, the factor S from the split Cholesky factorization */
 /* >          B = S**H*S, as returned by ZPBSTF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDBB */
 /* > \verbatim */
 /* >          LDBB is INTEGER */
 /* >          The leading dimension of the array BB.  LDBB >= KB+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Q */
 /* > \verbatim */
 /* >          Q is COMPLEX*16 array, dimension (LDQ, N) */
 /* >          If JOBZ = 'V', the n-by-n matrix used in the reduction of */
 /* >          A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, */
 /* >          and consequently C to tridiagonal form. */
 /* >          If JOBZ = 'N', the array Q is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDQ */
 /* > \verbatim */
 /* >          LDQ is INTEGER */
 /* >          The leading dimension of the array Q.  If JOBZ = 'N', */
 /* >          LDQ >= 1. If JOBZ = 'V', LDQ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] VL */
 /* > \verbatim */
 /* >          VL is DOUBLE PRECISION */
 /* > */
 /* >          If RANGE='V', the lower bound of the interval to */
 /* >          be searched for eigenvalues. VL < VU. */
 /* >          Not referenced if RANGE = 'A' or 'I'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] VU */
 /* > \verbatim */
 /* >          VU is DOUBLE PRECISION */
 /* > */
 /* >          If RANGE='V', the upper bound of the interval to */
 /* >          be searched for eigenvalues. VL < VU. */
 /* >          Not referenced if RANGE = 'A' or 'I'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IL */
 /* > \verbatim */
 /* >          IL is INTEGER */
 /* > */
 /* >          If RANGE='I', the index of the */
 /* >          smallest eigenvalue to be returned. */
 /* >          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
 /* >          Not referenced if RANGE = 'A' or 'V'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IU */
 /* > \verbatim */
 /* >          IU is INTEGER */
 /* > */
 /* >          If RANGE='I', the index of the */
 /* >          largest eigenvalue to be returned. */
 /* >          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
 /* >          Not referenced if RANGE = 'A' or 'V'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ABSTOL */
 /* > \verbatim */
 /* >          ABSTOL is DOUBLE PRECISION */
 /* >          The absolute error tolerance for the eigenvalues. */
 /* >          An approximate eigenvalue is accepted as converged */
 /* >          when it is determined to lie in an interval [a,b] */
 /* >          of width less than or equal to */
 /* > */
 /* >                  ABSTOL + EPS *   f2cmax( |a|,|b| ) , */
 /* > */
 /* >          where EPS is the machine precision.  If ABSTOL is less than */
 /* >          or equal to zero, then  EPS*|T|  will be used in its place, */
 /* >          where |T| is the 1-norm of the tridiagonal matrix obtained */
 /* >          by reducing AP to tridiagonal form. */
 /* > */
 /* >          Eigenvalues will be computed most accurately when ABSTOL is */
 /* >          set to twice the underflow threshold 2*DLAMCH('S'), not zero. */
 /* >          If this routine returns with INFO>0, indicating that some */
 /* >          eigenvectors did not converge, try setting ABSTOL to */
 /* >          2*DLAMCH('S'). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The total number of eigenvalues found.  0 <= M <= N. */
 /* >          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX*16 array, dimension (LDZ, N) */
 /* >          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */
 /* >          eigenvectors, with the i-th column of Z holding the */
 /* >          eigenvector associated with W(i). The eigenvectors are */
 /* >          normalized so that Z**H*B*Z = I. */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (7*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (5*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IFAIL */
 /* > \verbatim */
 /* >          IFAIL is INTEGER array, dimension (N) */
 /* >          If JOBZ = 'V', then if INFO = 0, the first M elements of */
 /* >          IFAIL are zero.  If INFO > 0, then IFAIL contains the */
 /* >          indices of the eigenvectors that failed to converge. */
 /* >          If JOBZ = 'N', then IFAIL is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is: */
 /* >             <= N:  then i eigenvectors failed to converge.  Their */
 /* >                    indices are stored in array IFAIL. */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then ZPBSTF */
 /* >                    returned INFO = i: B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup complex16OTHEReigen */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

 /*  ===================================================================== */
 /* Subroutine */ int zhbgvx_(char *jobz, char *range, char *uplo, integer *n, 
 	integer *ka, integer *kb, doublecomplex *ab, integer *ldab, 
 	doublecomplex *bb, integer *ldbb, doublecomplex *q, integer *ldq, 
 	doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal *
 	abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, 
 	doublecomplex *work, doublereal *rwork, integer *iwork, integer *
 	ifail, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, bb_dim1, bb_offset, q_dim1, q_offset, z_dim1, 
 	    z_offset, i__1, i__2;

    /* Local variables */
    integer indd, inde;
    char vect[1];
    logical test;
    integer itmp1, i__, j, indee;
    extern logical lsame_(char *, char *);
    integer iinfo;
    char order[1];
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *), zgemv_(char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *);
    logical upper, wantz;
    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *), zswap_(integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *);
    integer jj;
    logical alleig, indeig;
    integer indibl;
    logical valeig;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    integer indiwk, indisp;
    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), dstebz_(char *, char *, integer *, doublereal *, 
 	    doublereal *, integer *, integer *, doublereal *, doublereal *, 
 	    doublereal *, integer *, integer *, doublereal *, integer *, 
 	    integer *, doublereal *, integer *, integer *), 
 	    zhbtrd_(char *, char *, integer *, integer *, doublecomplex *, 
 	    integer *, doublereal *, doublereal *, doublecomplex *, integer *,
 	     doublecomplex *, integer *);
    integer indrwk, indwrk;
    extern /* Subroutine */ int zhbgst_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
 	     doublecomplex *, integer *, doublecomplex *, doublereal *, 
 	    integer *), zlacpy_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
    integer nsplit;
    extern /* Subroutine */ int zpbstf_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *), zstein_(integer *,
 	     doublereal *, doublereal *, integer *, doublereal *, integer *, 
 	    integer *, doublecomplex *, integer *, doublereal *, integer *, 
 	    integer *, integer *), zsteqr_(char *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublereal *, integer *);
    doublereal tmp1;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    bb_dim1 = *ldbb;
    bb_offset = 1 + bb_dim1 * 1;
    bb -= bb_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;
    --ifail;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    alleig = lsame_(range, "A");
    valeig = lsame_(range, "V");
    indeig = lsame_(range, "I");

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (alleig || valeig || indeig)) {
 	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*ka < 0) {
 	*info = -5;
    } else if (*kb < 0 || *kb > *ka) {
 	*info = -6;
    } else if (*ldab < *ka + 1) {
 	*info = -8;
    } else if (*ldbb < *kb + 1) {
 	*info = -10;
    } else if (*ldq < 1 || wantz && *ldq < *n) {
 	*info = -12;
    } else {
 	if (valeig) {
 	    if (*n > 0 && *vu <= *vl) {
 		*info = -14;
 	    }
 	} else if (indeig) {
 	    if (*il < 1 || *il > f2cmax(1,*n)) {
 		*info = -15;
 	    } else if (*iu < f2cmin(*n,*il) || *iu > *n) {
 		*info = -16;
 	    }
 	}
    }
    if (*info == 0) {
 	if (*ldz < 1 || wantz && *ldz < *n) {
 	    *info = -21;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHBGVX", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    *m = 0;
    if (*n == 0) {
 	return 0;
    }

 /*     Form a split Cholesky factorization of B. */

    zpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem. */

    zhbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb,
 	     &q[q_offset], ldq, &work[1], &rwork[1], &iinfo);

 /*     Solve the standard eigenvalue problem. */
 /*     Reduce Hermitian band matrix to tridiagonal form. */

    indd = 1;
    inde = indd + *n;
    indrwk = inde + *n;
    indwrk = 1;
    if (wantz) {
 	*(unsigned char *)vect = 'U';
    } else {
 	*(unsigned char *)vect = 'N';
    }
    zhbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &rwork[indd], &rwork[
 	    inde], &q[q_offset], ldq, &work[indwrk], &iinfo);

 /*     If all eigenvalues are desired and ABSTOL is less than or equal */
 /*     to zero, then call DSTERF or ZSTEQR.  If this fails for some */
 /*     eigenvalue, then try DSTEBZ. */

    test = FALSE_;
    if (indeig) {
 	if (*il == 1 && *iu == *n) {
 	    test = TRUE_;
 	}
    }
    if ((alleig || test) && *abstol <= 0.) {
 	dcopy_(n, &rwork[indd], &c__1, &w[1], &c__1);
 	indee = indrwk + (*n << 1);
 	i__1 = *n - 1;
 	dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1);
 	if (! wantz) {
 	    dsterf_(n, &w[1], &rwork[indee], info);
 	} else {
 	    zlacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz);
 	    zsteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, &
 		    rwork[indrwk], info);
 	    if (*info == 0) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    ifail[i__] = 0;
 /* L10: */
 		}
 	    }
 	}
 	if (*info == 0) {
 	    *m = *n;
 	    goto L30;
 	}
 	*info = 0;
    }

 /*     Otherwise, call DSTEBZ and, if eigenvectors are desired, */
 /*     call ZSTEIN. */

    if (wantz) {
 	*(unsigned char *)order = 'B';
    } else {
 	*(unsigned char *)order = 'E';
    }
    indibl = 1;
    indisp = indibl + *n;
    indiwk = indisp + *n;
    dstebz_(range, order, n, vl, vu, il, iu, abstol, &rwork[indd], &rwork[
 	    inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[
 	    indrwk], &iwork[indiwk], info);

    if (wantz) {
 	zstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &
 		iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[
 		indiwk], &ifail[1], info);

 /*        Apply unitary matrix used in reduction to tridiagonal */
 /*        form to eigenvectors returned by ZSTEIN. */

 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
 	    zcopy_(n, &z__[j * z_dim1 + 1], &c__1, &work[1], &c__1);
 	    zgemv_("N", n, n, &c_b2, &q[q_offset], ldq, &work[1], &c__1, &
 		    c_b1, &z__[j * z_dim1 + 1], &c__1);
 /* L20: */
 	}
    }

 L30:

 /*     If eigenvalues are not in order, then sort them, along with */
 /*     eigenvectors. */

    if (wantz) {
 	i__1 = *m - 1;
 	for (j = 1; j <= i__1; ++j) {
 	    i__ = 0;
 	    tmp1 = w[j];
 	    i__2 = *m;
 	    for (jj = j + 1; jj <= i__2; ++jj) {
 		if (w[jj] < tmp1) {
 		    i__ = jj;
 		    tmp1 = w[jj];
 		}
 /* L40: */
 	    }

 	    if (i__ != 0) {
 		itmp1 = iwork[indibl + i__ - 1];
 		w[i__] = w[j];
 		iwork[indibl + i__ - 1] = iwork[indibl + j - 1];
 		w[j] = tmp1;
 		iwork[indibl + j - 1] = itmp1;
 		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1],
 			 &c__1);
 		if (*info != 0) {
 		    itmp1 = ifail[i__];
 		    ifail[i__] = ifail[j];
 		    ifail[j] = itmp1;
 		}
 	    }
 /* L50: */
 	}
    }

    return 0;

 /*     End of ZHBGVX */

 } /* zhbgvx_ */

--- a/lapack-netlib/SRC/zhbtrd.c
+++ b/lapack-netlib/SRC/zhbtrd.c
--- a/lapack-netlib/SRC/zhecon.c
+++ b/lapack-netlib/SRC/zhecon.c
@@ -0,0 +1,635 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b ZHECON */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHECON + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhecon.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhecon.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhecon.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, */
 /*                          INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, N */
 /*       DOUBLE PRECISION   ANORM, RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHECON estimates the reciprocal of the condition number of a complex */
 /* > Hermitian matrix A using the factorization A = U*D*U**H or */
 /* > A = L*D*L**H computed by ZHETRF. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the details of the factorization are stored */
 /* >          as an upper or lower triangular matrix. */
 /* >          = 'U':  Upper triangular, form is A = U*D*U**H; */
 /* >          = 'L':  Lower triangular, form is A = L*D*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          The block diagonal matrix D and the multipliers used to */
 /* >          obtain the factor U or L as computed by ZHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D */
 /* >          as determined by ZHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is DOUBLE PRECISION */
 /* >          The 1-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is DOUBLE PRECISION */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
 /* >          estimate of the 1-norm of inv(A) computed in this routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zhecon_(char *uplo, integer *n, doublecomplex *a, 
 	integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, 
 	doublecomplex *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    integer kase, i__;
    extern logical lsame_(char *, char *);
    integer isave[3];
    logical upper;
    extern /* Subroutine */ int zlacn2_(integer *, doublecomplex *, 
 	    doublecomplex *, doublereal *, integer *, integer *), xerbla_(
 	    char *, integer *, ftnlen);
    doublereal ainvnm;
    extern /* Subroutine */ int zhetrs_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
 	     integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*anorm < 0.) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHECON", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.;
    if (*n == 0) {
 	*rcond = 1.;
 	return 0;
    } else if (*anorm <= 0.) {
 	return 0;
    }

 /*     Check that the diagonal matrix D is nonsingular. */

    if (upper) {

 /*        Upper triangular storage: examine D from bottom to top */

 	for (i__ = *n; i__ >= 1; --i__) {
 	    i__1 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) {
 		return 0;
 	    }
 /* L10: */
 	}
    } else {

 /*        Lower triangular storage: examine D from top to bottom. */

 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) {
 		return 0;
 	    }
 /* L20: */
 	}
    }

 /*     Estimate the 1-norm of the inverse. */

    kase = 0;
 L30:
    zlacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave);
    if (kase != 0) {

 /*        Multiply by inv(L*D*L**H) or inv(U*D*U**H). */

 	zhetrs_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], n, 
 		info);
 	goto L30;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.) {
 	*rcond = 1. / ainvnm / *anorm;
    }

    return 0;

 /*     End of ZHECON */

 } /* zhecon_ */

--- a/lapack-netlib/SRC/zhecon_3.c
+++ b/lapack-netlib/SRC/zhecon_3.c
@@ -0,0 +1,675 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b ZHECON_3 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHECON_3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhecon_
 3.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhecon_
 3.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhecon_
 3.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, */
 /*                            WORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, N */
 /*       DOUBLE PRECISION   ANORM, RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), E ( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > ZHECON_3 estimates the reciprocal of the condition number (in the */
 /* > 1-norm) of a complex Hermitian matrix A using the factorization */
 /* > computed by ZHETRF_RK or ZHETRF_BK: */
 /* > */
 /* >    A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), */
 /* > */
 /* > where U (or L) is unit upper (or lower) triangular matrix, */
 /* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */
 /* > matrix, P**T is the transpose of P, and D is Hermitian and block */
 /* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
 /* > This routine uses BLAS3 solver ZHETRS_3. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the details of the factorization are */
 /* >          stored as an upper or lower triangular matrix: */
 /* >          = 'U':  Upper triangular, form is A = P*U*D*(U**H)*(P**T); */
 /* >          = 'L':  Lower triangular, form is A = P*L*D*(L**H)*(P**T). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          Diagonal of the block diagonal matrix D and factors U or L */
 /* >          as computed by ZHETRF_RK and ZHETRF_BK: */
 /* >            a) ONLY diagonal elements of the Hermitian block diagonal */
 /* >               matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */
 /* >               (superdiagonal (or subdiagonal) elements of D */
 /* >                should be provided on entry in array E), and */
 /* >            b) If UPLO = 'U': factor U in the superdiagonal part of A. */
 /* >               If UPLO = 'L': factor L in the subdiagonal part of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] E */
 /* > \verbatim */
 /* >          E is COMPLEX*16 array, dimension (N) */
 /* >          On entry, contains the superdiagonal (or subdiagonal) */
 /* >          elements of the Hermitian block diagonal matrix D */
 /* >          with 1-by-1 or 2-by-2 diagonal blocks, where */
 /* >          If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; */
 /* >          If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. */
 /* > */
 /* >          NOTE: For 1-by-1 diagonal block D(k), where */
 /* >          1 <= k <= N, the element E(k) is not referenced in both */
 /* >          UPLO = 'U' or UPLO = 'L' cases. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D */
 /* >          as determined by ZHETRF_RK or ZHETRF_BK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is DOUBLE PRECISION */
 /* >          The 1-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is DOUBLE PRECISION */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
 /* >          estimate of the 1-norm of inv(A) computed in this routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup complex16HEcomputational */

 /* > \par Contributors: */
 /*  ================== */
 /* > \verbatim */
 /* > */
 /* >  June 2017,  Igor Kozachenko, */
 /* >                  Computer Science Division, */
 /* >                  University of California, Berkeley */
 /* > */
 /* >  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */
 /* >                  School of Mathematics, */
 /* >                  University of Manchester */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int zhecon_3_(char *uplo, integer *n, doublecomplex *a, 
 	integer *lda, doublecomplex *e, integer *ipiv, doublereal *anorm, 
 	doublereal *rcond, doublecomplex *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    extern /* Subroutine */ int zhetrs_3_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, integer *);
    integer kase, i__;
    extern logical lsame_(char *, char *);
    integer isave[3];
    logical upper;
    extern /* Subroutine */ int zlacn2_(integer *, doublecomplex *, 
 	    doublecomplex *, doublereal *, integer *, integer *), xerbla_(
 	    char *, integer *, ftnlen);
    doublereal ainvnm;


 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --e;
    --ipiv;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*anorm < 0.) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHECON_3", &i__1, (ftnlen)8);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.;
    if (*n == 0) {
 	*rcond = 1.;
 	return 0;
    } else if (*anorm <= 0.) {
 	return 0;
    }

 /*     Check that the diagonal matrix D is nonsingular. */

    if (upper) {

 /*        Upper triangular storage: examine D from bottom to top */

 	for (i__ = *n; i__ >= 1; --i__) {
 	    i__1 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) {
 		return 0;
 	    }
 	}
    } else {

 /*        Lower triangular storage: examine D from top to bottom. */

 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) {
 		return 0;
 	    }
 	}
    }

 /*     Estimate the 1-norm of the inverse. */

    kase = 0;
 L30:
    zlacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave);
    if (kase != 0) {

 /*        Multiply by inv(L*D*L**H) or inv(U*D*U**H). */

 	zhetrs_3_(uplo, n, &c__1, &a[a_offset], lda, &e[1], &ipiv[1], &work[
 		1], n, info);
 	goto L30;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.) {
 	*rcond = 1. / ainvnm / *anorm;
    }

    return 0;

 /*     End of ZHECON_3 */

 } /* zhecon_3__ */

--- a/lapack-netlib/SRC/zhecon_rook.c
+++ b/lapack-netlib/SRC/zhecon_rook.c
@@ -0,0 +1,650 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief <b> ZHECON_ROOK estimates the reciprocal of the condition number fort HE matrices using factorizat
 ion obtained with one of the bounded diagonal pivoting methods (f2cmax 2 interchanges) </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHECON_ROOK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhecon_
 rook.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhecon_
 rook.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhecon_
 rook.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHECON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, */
 /*                               INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, N */
 /*       DOUBLE PRECISION   ANORM, RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHECON_ROOK estimates the reciprocal of the condition number of a complex */
 /* > Hermitian matrix A using the factorization A = U*D*U**H or */
 /* > A = L*D*L**H computed by CHETRF_ROOK. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the details of the factorization are stored */
 /* >          as an upper or lower triangular matrix. */
 /* >          = 'U':  Upper triangular, form is A = U*D*U**H; */
 /* >          = 'L':  Lower triangular, form is A = L*D*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          The block diagonal matrix D and the multipliers used to */
 /* >          obtain the factor U or L as computed by CHETRF_ROOK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D */
 /* >          as determined by CHETRF_ROOK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is DOUBLE PRECISION */
 /* >          The 1-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is DOUBLE PRECISION */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
 /* >          estimate of the 1-norm of inv(A) computed in this routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup complex16HEcomputational */

 /* > \par Contributors: */
 /*  ================== */
 /* > \verbatim */
 /* > */
 /* >  June 2017,  Igor Kozachenko, */
 /* >                  Computer Science Division, */
 /* >                  University of California, Berkeley */
 /* > */
 /* >  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */
 /* >                  School of Mathematics, */
 /* >                  University of Manchester */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int zhecon_rook_(char *uplo, integer *n, doublecomplex *a, 
 	integer *lda, integer *ipiv, doublereal *anorm, doublereal *rcond, 
 	doublecomplex *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    extern /* Subroutine */ int zhetrs_rook_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
 	     integer *);
    integer kase, i__;
    extern logical lsame_(char *, char *);
    integer isave[3];
    logical upper;
    extern /* Subroutine */ int zlacn2_(integer *, doublecomplex *, 
 	    doublecomplex *, doublereal *, integer *, integer *), xerbla_(
 	    char *, integer *, ftnlen);
    doublereal ainvnm;


 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*anorm < 0.) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHECON_ROOK", &i__1, (ftnlen)11);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.;
    if (*n == 0) {
 	*rcond = 1.;
 	return 0;
    } else if (*anorm <= 0.) {
 	return 0;
    }

 /*     Check that the diagonal matrix D is nonsingular. */

    if (upper) {

 /*        Upper triangular storage: examine D from bottom to top */

 	for (i__ = *n; i__ >= 1; --i__) {
 	    i__1 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) {
 		return 0;
 	    }
 /* L10: */
 	}
    } else {

 /*        Lower triangular storage: examine D from top to bottom. */

 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + i__ * a_dim1;
 	    if (ipiv[i__] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) {
 		return 0;
 	    }
 /* L20: */
 	}
    }

 /*     Estimate the 1-norm of the inverse. */

    kase = 0;
 L30:
    zlacn2_(n, &work[*n + 1], &work[1], &ainvnm, &kase, isave);
    if (kase != 0) {

 /*        Multiply by inv(L*D*L**H) or inv(U*D*U**H). */

 	zhetrs_rook_(uplo, n, &c__1, &a[a_offset], lda, &ipiv[1], &work[1], 
 		n, info);
 	goto L30;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.) {
 	*rcond = 1. / ainvnm / *anorm;
    }

    return 0;

 /*     End of ZHECON_ROOK */

 } /* zhecon_rook__ */

--- a/lapack-netlib/SRC/zheequb.c
+++ b/lapack-netlib/SRC/zheequb.c
@@ -0,0 +1,874 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b ZHEEQUB */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEEQUB + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheequb
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheequb
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheequb
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) */

 /*       INTEGER            INFO, LDA, N */
 /*       DOUBLE PRECISION   AMAX, SCOND */
 /*       CHARACTER          UPLO */
 /*       COMPLEX*16         A( LDA, * ), WORK( * ) */
 /*       DOUBLE PRECISION   S( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEEQUB computes row and column scalings intended to equilibrate a */
 /* > Hermitian matrix A (with respect to the Euclidean norm) and reduce */
 /* > its condition number. The scale factors S are computed by the BIN */
 /* > algorithm (see references) so that the scaled matrix B with elements */
 /* > B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of */
 /* > the smallest possible condition number over all possible diagonal */
 /* > scalings. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          The N-by-N Hermitian matrix whose scaling factors are to be */
 /* >          computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] S */
 /* > \verbatim */
 /* >          S is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, S contains the scale factors for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] SCOND */
 /* > \verbatim */
 /* >          SCOND is DOUBLE PRECISION */
 /* >          If INFO = 0, S contains the ratio of the smallest S(i) to */
 /* >          the largest S(i). If SCOND >= 0.1 and AMAX is neither too */
 /* >          large nor too small, it is not worth scaling by S. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AMAX */
 /* > \verbatim */
 /* >          AMAX is DOUBLE PRECISION */
 /* >          Largest absolute value of any matrix element. If AMAX is */
 /* >          very close to overflow or very close to underflow, the */
 /* >          matrix should be scaled. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, the i-th diagonal element is nonpositive. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date April 2012 */

 /* > \ingroup complex16HEcomputational */

 /* > \par References: */
 /*  ================ */
 /* > */
 /* >  Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n */
 /* >  Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n */
 /* >  DOI 10.1023/B:NUMA.0000016606.32820.69 \n */
 /* >  Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zheequb_(char *uplo, integer *n, doublecomplex *a, 
 	integer *lda, doublereal *s, doublereal *scond, doublereal *amax, 
 	doublecomplex *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1, z__2, z__3, z__4;

    /* Local variables */
    doublereal base;
    integer iter;
    doublereal smin, smax, d__;
    integer i__, j;
    doublereal t, u, scale;
    extern logical lsame_(char *, char *);
    doublereal c0, c1, c2, sumsq;
    extern doublereal dlamch_(char *);
    doublereal si;
    logical up;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum, smlnum;
    extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *,
 	     doublereal *, doublereal *);
    doublereal avg, std, tol;


 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     April 2012 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --s;
    --work;

    /* Function Body */
    *info = 0;
    if (! (lsame_(uplo, "U") || lsame_(uplo, "L"))) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEEQUB", &i__1, (ftnlen)7);
 	return 0;
    }
    up = lsame_(uplo, "U");
    *amax = 0.;

 /*     Quick return if possible. */

    if (*n == 0) {
 	*scond = 1.;
 	return 0;
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	s[i__] = 0.;
    }
    *amax = 0.;
    if (up) {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = j - 1;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		d__3 = s[i__], d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&a[i__ + j * a_dim1]), abs(d__2));
 		s[i__] = f2cmax(d__3,d__4);
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		d__3 = s[j], d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&a[i__ + j * a_dim1]), abs(d__2));
 		s[j] = f2cmax(d__3,d__4);
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		d__3 = *amax, d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&a[i__ + j * a_dim1]), abs(d__2));
 		*amax = f2cmax(d__3,d__4);
 	    }
 /* Computing MAX */
 	    i__2 = j + j * a_dim1;
 	    d__3 = s[j], d__4 = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = 
 		    d_imag(&a[j + j * a_dim1]), abs(d__2));
 	    s[j] = f2cmax(d__3,d__4);
 /* Computing MAX */
 	    i__2 = j + j * a_dim1;
 	    d__3 = *amax, d__4 = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = 
 		    d_imag(&a[j + j * a_dim1]), abs(d__2));
 	    *amax = f2cmax(d__3,d__4);
 	}
    } else {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MAX */
 	    i__2 = j + j * a_dim1;
 	    d__3 = s[j], d__4 = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = 
 		    d_imag(&a[j + j * a_dim1]), abs(d__2));
 	    s[j] = f2cmax(d__3,d__4);
 /* Computing MAX */
 	    i__2 = j + j * a_dim1;
 	    d__3 = *amax, d__4 = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = 
 		    d_imag(&a[j + j * a_dim1]), abs(d__2));
 	    *amax = f2cmax(d__3,d__4);
 	    i__2 = *n;
 	    for (i__ = j + 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		d__3 = s[i__], d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&a[i__ + j * a_dim1]), abs(d__2));
 		s[i__] = f2cmax(d__3,d__4);
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		d__3 = s[j], d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&a[i__ + j * a_dim1]), abs(d__2));
 		s[j] = f2cmax(d__3,d__4);
 /* Computing MAX */
 		i__3 = i__ + j * a_dim1;
 		d__3 = *amax, d__4 = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&a[i__ + j * a_dim1]), abs(d__2));
 		*amax = f2cmax(d__3,d__4);
 	    }
 	}
    }
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	s[j] = 1. / s[j];
    }
    tol = 1. / sqrt(*n * 2.);
    for (iter = 1; iter <= 100; ++iter) {
 	scale = 0.;
 	sumsq = 0.;
 /*        beta = |A|s */
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    work[i__2].r = 0., work[i__2].i = 0.;
 	}
 	if (up) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__ + j * a_dim1;
 		    d__3 = ((d__1 = a[i__5].r, abs(d__1)) + (d__2 = d_imag(&a[
 			    i__ + j * a_dim1]), abs(d__2))) * s[j];
 		    z__1.r = work[i__4].r + d__3, z__1.i = work[i__4].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 		    i__3 = j;
 		    i__4 = j;
 		    i__5 = i__ + j * a_dim1;
 		    d__3 = ((d__1 = a[i__5].r, abs(d__1)) + (d__2 = d_imag(&a[
 			    i__ + j * a_dim1]), abs(d__2))) * s[i__];
 		    z__1.r = work[i__4].r + d__3, z__1.i = work[i__4].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 		}
 		i__2 = j;
 		i__3 = j;
 		i__4 = j + j * a_dim1;
 		d__3 = ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[j + 
 			j * a_dim1]), abs(d__2))) * s[j];
 		z__1.r = work[i__3].r + d__3, z__1.i = work[i__3].i;
 		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = j;
 		i__3 = j;
 		i__4 = j + j * a_dim1;
 		d__3 = ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[j + 
 			j * a_dim1]), abs(d__2))) * s[j];
 		z__1.r = work[i__3].r + d__3, z__1.i = work[i__3].i;
 		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
 		i__2 = *n;
 		for (i__ = j + 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__ + j * a_dim1;
 		    d__3 = ((d__1 = a[i__5].r, abs(d__1)) + (d__2 = d_imag(&a[
 			    i__ + j * a_dim1]), abs(d__2))) * s[j];
 		    z__1.r = work[i__4].r + d__3, z__1.i = work[i__4].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 		    i__3 = j;
 		    i__4 = j;
 		    i__5 = i__ + j * a_dim1;
 		    d__3 = ((d__1 = a[i__5].r, abs(d__1)) + (d__2 = d_imag(&a[
 			    i__ + j * a_dim1]), abs(d__2))) * s[i__];
 		    z__1.r = work[i__4].r + d__3, z__1.i = work[i__4].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 		}
 	    }
 	}
 /*        avg = s^T beta / n */
 	avg = 0.;
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    i__3 = i__;
 	    z__2.r = s[i__2] * work[i__3].r, z__2.i = s[i__2] * work[i__3].i;
 	    z__1.r = avg + z__2.r, z__1.i = z__2.i;
 	    avg = z__1.r;
 	}
 	avg /= *n;
 	std = 0.;
 	i__1 = *n << 1;
 	for (i__ = *n + 1; i__ <= i__1; ++i__) {
 	    i__2 = i__;
 	    i__3 = i__ - *n;
 	    i__4 = i__ - *n;
 	    z__2.r = s[i__3] * work[i__4].r, z__2.i = s[i__3] * work[i__4].i;
 	    z__1.r = z__2.r - avg, z__1.i = z__2.i;
 	    work[i__2].r = z__1.r, work[i__2].i = z__1.i;
 	}
 	zlassq_(n, &work[*n + 1], &c__1, &scale, &sumsq);
 	std = scale * sqrt(sumsq / *n);
 	if (std < tol * avg) {
 	    goto L999;
 	}
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + i__ * a_dim1;
 	    t = (d__1 = a[i__2].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + i__ * 
 		    a_dim1]), abs(d__2));
 	    si = s[i__];
 	    c2 = (*n - 1) * t;
 	    i__2 = *n - 2;
 	    i__3 = i__;
 	    d__1 = t * si;
 	    z__2.r = work[i__3].r - d__1, z__2.i = work[i__3].i;
 	    d__2 = (doublereal) i__2;
 	    z__1.r = d__2 * z__2.r, z__1.i = d__2 * z__2.i;
 	    c1 = z__1.r;
 	    d__1 = -(t * si) * si;
 	    i__2 = i__;
 	    d__2 = 2.;
 	    z__4.r = d__2 * work[i__2].r, z__4.i = d__2 * work[i__2].i;
 	    z__3.r = si * z__4.r, z__3.i = si * z__4.i;
 	    z__2.r = d__1 + z__3.r, z__2.i = z__3.i;
 	    d__3 = *n * avg;
 	    z__1.r = z__2.r - d__3, z__1.i = z__2.i;
 	    c0 = z__1.r;
 	    d__ = c1 * c1 - c0 * 4 * c2;
 	    if (d__ <= 0.) {
 		*info = -1;
 		return 0;
 	    }
 	    si = c0 * -2 / (c1 + sqrt(d__));
 	    d__ = si - s[i__];
 	    u = 0.;
 	    if (up) {
 		i__2 = i__;
 		for (j = 1; j <= i__2; ++j) {
 		    i__3 = j + i__ * a_dim1;
 		    t = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + 
 			    i__ * a_dim1]), abs(d__2));
 		    u += s[j] * t;
 		    i__3 = j;
 		    i__4 = j;
 		    d__1 = d__ * t;
 		    z__1.r = work[i__4].r + d__1, z__1.i = work[i__4].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 		}
 		i__2 = *n;
 		for (j = i__ + 1; j <= i__2; ++j) {
 		    i__3 = i__ + j * a_dim1;
 		    t = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[i__ 
 			    + j * a_dim1]), abs(d__2));
 		    u += s[j] * t;
 		    i__3 = j;
 		    i__4 = j;
 		    d__1 = d__ * t;
 		    z__1.r = work[i__4].r + d__1, z__1.i = work[i__4].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 		}
 	    } else {
 		i__2 = i__;
 		for (j = 1; j <= i__2; ++j) {
 		    i__3 = i__ + j * a_dim1;
 		    t = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[i__ 
 			    + j * a_dim1]), abs(d__2));
 		    u += s[j] * t;
 		    i__3 = j;
 		    i__4 = j;
 		    d__1 = d__ * t;
 		    z__1.r = work[i__4].r + d__1, z__1.i = work[i__4].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 		}
 		i__2 = *n;
 		for (j = i__ + 1; j <= i__2; ++j) {
 		    i__3 = j + i__ * a_dim1;
 		    t = (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + 
 			    i__ * a_dim1]), abs(d__2));
 		    u += s[j] * t;
 		    i__3 = j;
 		    i__4 = j;
 		    d__1 = d__ * t;
 		    z__1.r = work[i__4].r + d__1, z__1.i = work[i__4].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 		}
 	    }
 	    i__2 = i__;
 	    z__4.r = u + work[i__2].r, z__4.i = work[i__2].i;
 	    z__3.r = d__ * z__4.r, z__3.i = d__ * z__4.i;
 	    d__1 = (doublereal) (*n);
 	    z__2.r = z__3.r / d__1, z__2.i = z__3.i / d__1;
 	    z__1.r = avg + z__2.r, z__1.i = z__2.i;
 	    avg = z__1.r;
 	    s[i__] = si;
 	}
    }
 L999:
    smlnum = dlamch_("SAFEMIN");
    bignum = 1. / smlnum;
    smin = bignum;
    smax = 0.;
    t = 1. / sqrt(avg);
    base = dlamch_("B");
    u = 1. / log(base);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = (integer) (u * log(s[i__] * t));
 	s[i__] = pow_di(&base, &i__2);
 /* Computing MIN */
 	d__1 = smin, d__2 = s[i__];
 	smin = f2cmin(d__1,d__2);
 /* Computing MAX */
 	d__1 = smax, d__2 = s[i__];
 	smax = f2cmax(d__1,d__2);
    }
    *scond = f2cmax(smin,smlnum) / f2cmin(smax,bignum);

    return 0;
 } /* zheequb_ */

--- a/lapack-netlib/SRC/zheev.c
+++ b/lapack-netlib/SRC/zheev.c
@@ -0,0 +1,726 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__0 = 0;
 static doublereal c_b18 = 1.;

 /* > \brief <b> ZHEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matr
 ices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEEV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheev.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheev.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheev.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */
 /*                         INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, LDA, LWORK, N */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEEV computes all eigenvalues and, optionally, eigenvectors of a */
 /* > complex Hermitian matrix A. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          orthonormal eigenvectors of the matrix A. */
 /* >          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */
 /* >          or the upper triangle (if UPLO='U') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,2*N-1). */
 /* >          For optimal efficiency, LWORK >= (NB+1)*N, */
 /* >          where NB is the blocksize for ZHETRD returned by ILAENV. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, 3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEeigen */

 /*  ===================================================================== */
 /* Subroutine */ int zheev_(char *jobz, char *uplo, integer *n, doublecomplex 
 	*a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork, 
 	doublereal *rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    doublereal d__1;

    /* Local variables */
    integer inde;
    doublereal anrm;
    integer imax;
    doublereal rmin, rmax;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    doublereal sigma;
    extern logical lsame_(char *, char *);
    integer iinfo;
    logical lower, wantz;
    integer nb;
    extern doublereal dlamch_(char *);
    integer iscale;
    doublereal safmin;
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum;
    extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, 
 	    integer *, doublereal *);
    integer indtau;
    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), zlascl_(char *, integer *, integer *, doublereal *, 
 	    doublereal *, integer *, integer *, doublecomplex *, integer *, 
 	    integer *);
    integer indwrk;
    extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, 
 	    integer *, doublereal *, doublereal *, doublecomplex *, 
 	    doublecomplex *, integer *, integer *);
    integer llwork;
    doublereal smlnum;
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublereal *, integer *), zungtr_(char *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, integer *);
    doublereal eps;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --w;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1;

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }

    if (*info == 0) {
 	nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 /* Computing MAX */
 	i__1 = 1, i__2 = (nb + 1) * *n;
 	lwkopt = f2cmax(i__1,i__2);
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;

 /* Computing MAX */
 	i__1 = 1, i__2 = (*n << 1) - 1;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -8;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEEV ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = a_dim1 + 1;
 	w[1] = a[i__1].r;
 	work[1].r = 1., work[1].i = 0.;
 	if (wantz) {
 	    i__1 = a_dim1 + 1;
 	    a[i__1].r = 1., a[i__1].i = 0.;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = dlamch_("Safe minimum");
    eps = dlamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1. / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
    iscale = 0;
    if (anrm > 0. && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	zlascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, 
 		info);
    }

 /*     Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */

    inde = 1;
    indtau = 1;
    indwrk = indtau + *n;
    llwork = *lwork - indwrk + 1;
    zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
 	    work[indwrk], &llwork, &iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, first call */
 /*     ZUNGTR to generate the unitary matrix, then call ZSTEQR. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	zungtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], &
 		llwork, &iinfo);
 	indwrk = inde + *n;
 	zsteqr_(jobz, n, &w[1], &rwork[inde], &a[a_offset], lda, &rwork[
 		indwrk], info);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	d__1 = 1. / sigma;
 	dscal_(&imax, &d__1, &w[1], &c__1);
    }

 /*     Set WORK(1) to optimal complex workspace size. */

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;

    return 0;

 /*     End of ZHEEV */

 } /* zheev_ */

--- a/lapack-netlib/SRC/zheev_2stage.c
+++ b/lapack-netlib/SRC/zheev_2stage.c
@@ -0,0 +1,785 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static integer c__3 = 3;
 static integer c__4 = 4;
 static integer c__0 = 0;
 static doublereal c_b28 = 1.;

 /* > \brief <b> ZHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for 
 HE matrices</b> */

 /*  @precisions fortran z -> s d c */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEEV_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheev_2
 stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheev_2
 stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheev_2
 stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, */
 /*                                RWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, LDA, LWORK, N */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a */
 /* > complex Hermitian matrix A using the 2stage technique for */
 /* > the reduction to tridiagonal. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          orthonormal eigenvectors of the matrix A. */
 /* >          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */
 /* >          or the upper triangle (if UPLO='U') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. LWORK >= 1, when N <= 1; */
 /* >          otherwise */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */
 /* >                                             = N*KD + N*f2cmax(KD+1,FACTOPTNB) */
 /* >                                               + f2cmax(2*KD*KD, KD*NTHREADS) */
 /* >                                               + (KD+1)*N + N */
 /* >                                   where KD is the blocking size of the reduction, */
 /* >                                   FACTOPTNB is the blocking used by the QR or LQ */
 /* >                                   algorithm, usually FACTOPTNB=128 is a good choice */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, 3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, the algorithm failed to converge; i */
 /* >                off-diagonal elements of an intermediate tridiagonal */
 /* >                form did not converge to zero. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16HEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int zheev_2stage_(char *jobz, char *uplo, integer *n, 
 	doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, 
 	integer *lwork, doublereal *rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1;
    doublereal d__1;

    /* Local variables */
    integer inde;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    doublereal anrm;
    integer imax;
    doublereal rmin, rmax;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    doublereal sigma;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int zhetrd_2stage_(char *, char *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublereal *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, integer *);
    integer iinfo, lhtrd, lwmin;
    logical lower;
    integer lwtrd;
    logical wantz;
    integer ib, kd;
    extern doublereal dlamch_(char *);
    integer iscale;
    doublereal safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum;
    extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, 
 	    integer *, doublereal *);
    integer indtau;
    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), zlascl_(char *, integer *, integer *, doublereal *, 
 	    doublereal *, integer *, integer *, doublecomplex *, integer *, 
 	    integer *);
    integer indwrk, llwork;
    doublereal smlnum;
    logical lquery;
    extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublereal *, integer *), zungtr_(char *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, integer *);
    doublereal eps;
    integer indhous;



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --w;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1;

    *info = 0;
    if (! lsame_(jobz, "N")) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }

    if (*info == 0) {
 	kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", jobz, n, &c_n1, &c_n1, &
 		c_n1);
 	ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1);
 	lhtrd = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", jobz, n, &kd, &ib, &
 		c_n1);
 	lwtrd = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", jobz, n, &kd, &ib, &
 		c_n1);
 	lwmin = *n + lhtrd + lwtrd;
 	work[1].r = (doublereal) lwmin, work[1].i = 0.;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -8;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEEV_2STAGE ", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = a_dim1 + 1;
 	w[1] = a[i__1].r;
 	work[1].r = 1., work[1].i = 0.;
 	if (wantz) {
 	    i__1 = a_dim1 + 1;
 	    a[i__1].r = 1., a[i__1].i = 0.;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = dlamch_("Safe minimum");
    eps = dlamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1. / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
    iscale = 0;
    if (anrm > 0. && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	zlascl_(uplo, &c__0, &c__0, &c_b28, &sigma, n, n, &a[a_offset], lda, 
 		info);
    }

 /*     Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */

    inde = 1;
    indtau = 1;
    indhous = indtau + *n;
    indwrk = indhous + lhtrd;
    llwork = *lwork - indwrk + 1;

    zhetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &
 	    work[indtau], &work[indhous], &lhtrd, &work[indwrk], &llwork, &
 	    iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, first call */
 /*     ZUNGTR to generate the unitary matrix, then call ZSTEQR. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	zungtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], &
 		llwork, &iinfo);
 	indwrk = inde + *n;
 	zsteqr_(jobz, n, &w[1], &rwork[inde], &a[a_offset], lda, &rwork[
 		indwrk], info);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	d__1 = 1. / sigma;
 	dscal_(&imax, &d__1, &w[1], &c__1);
    }

 /*     Set WORK(1) to optimal complex workspace size. */

    work[1].r = (doublereal) lwmin, work[1].i = 0.;

    return 0;

 /*     End of ZHEEV_2STAGE */

 } /* zheev_2stage__ */

--- a/lapack-netlib/SRC/zheevd.c
+++ b/lapack-netlib/SRC/zheevd.c
@@ -0,0 +1,831 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__0 = 0;
 static doublereal c_b18 = 1.;

 /* > \brief <b> ZHEEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat
 rices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEEVD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheevd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheevd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheevd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */
 /*                          LRWORK, IWORK, LIWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a */
 /* > complex Hermitian matrix A.  If eigenvectors are desired, it uses a */
 /* > divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          orthonormal eigenvectors of the matrix A. */
 /* >          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */
 /* >          or the upper triangle (if UPLO='U') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. */
 /* >          If N <= 1,                LWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LWORK must be at least N + 1. */
 /* >          If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, */
 /* >                                         dimension (LRWORK) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of the array RWORK. */
 /* >          If N <= 1,                LRWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LRWORK must be at least N. */
 /* >          If JOBZ  = 'V' and N > 1, LRWORK must be at least */
 /* >                         1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of the array IWORK. */
 /* >          If N <= 1,                LIWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1. */
 /* >          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed */
 /* >                to converge; i off-diagonal elements of an intermediate */
 /* >                tridiagonal form did not converge to zero; */
 /* >                if INFO = i and JOBZ = 'V', then the algorithm failed */
 /* >                to compute an eigenvalue while working on the submatrix */
 /* >                lying in rows and columns INFO/(N+1) through */
 /* >                mod(INFO,N+1). */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* >  Modified description of INFO. Sven, 16 Feb 05. */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* > Jeff Rutter, Computer Science Division, University of California */
 /* > at Berkeley, USA */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zheevd_(char *jobz, char *uplo, integer *n, 
 	doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, 
 	integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, 
 	integer *liwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    doublereal d__1;

    /* Local variables */
    integer inde;
    doublereal anrm;
    integer imax;
    doublereal rmin, rmax;
    integer lopt;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    doublereal sigma;
    extern logical lsame_(char *, char *);
    integer iinfo, lwmin, liopt;
    logical lower;
    integer llrwk, lropt;
    logical wantz;
    integer indwk2, llwrk2;
    extern doublereal dlamch_(char *);
    integer iscale;
    doublereal safmin;
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum;
    extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, 
 	    integer *, doublereal *);
    integer indtau;
    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), zlascl_(char *, integer *, integer *, doublereal *, 
 	    doublereal *, integer *, integer *, doublecomplex *, integer *, 
 	    integer *), zstedc_(char *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublereal *, integer *, integer *, integer *, integer 
 	    *);
    integer indrwk, indwrk, liwmin;
    extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, 
 	    integer *, doublereal *, doublereal *, doublecomplex *, 
 	    doublecomplex *, integer *, integer *), zlacpy_(char *, 
 	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
 	     integer *);
    integer lrwmin, llwork;
    doublereal smlnum;
    logical lquery;
    extern /* Subroutine */ int zunmtr_(char *, char *, char *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
    doublereal eps;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --w;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }

    if (*info == 0) {
 	if (*n <= 1) {
 	    lwmin = 1;
 	    lrwmin = 1;
 	    liwmin = 1;
 	    lopt = lwmin;
 	    lropt = lrwmin;
 	    liopt = liwmin;
 	} else {
 	    if (wantz) {
 		lwmin = (*n << 1) + *n * *n;
 /* Computing 2nd power */
 		i__1 = *n;
 		lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 		liwmin = *n * 5 + 3;
 	    } else {
 		lwmin = *n + 1;
 		lrwmin = *n;
 		liwmin = 1;
 	    }
 /* Computing MAX */
 	    i__1 = lwmin, i__2 = *n + ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1,
 		     &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 	    lopt = f2cmax(i__1,i__2);
 	    lropt = lrwmin;
 	    liopt = liwmin;
 	}
 	work[1].r = (doublereal) lopt, work[1].i = 0.;
 	rwork[1] = (doublereal) lropt;
 	iwork[1] = liopt;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -8;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -10;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEEVD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = a_dim1 + 1;
 	w[1] = a[i__1].r;
 	if (wantz) {
 	    i__1 = a_dim1 + 1;
 	    a[i__1].r = 1., a[i__1].i = 0.;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = dlamch_("Safe minimum");
    eps = dlamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1. / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
    iscale = 0;
    if (anrm > 0. && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	zlascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, 
 		info);
    }

 /*     Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */

    inde = 1;
    indtau = 1;
    indwrk = indtau + *n;
    indrwk = inde + *n;
    indwk2 = indwrk + *n * *n;
    llwork = *lwork - indwrk + 1;
    llwrk2 = *lwork - indwk2 + 1;
    llrwk = *lrwork - indrwk + 1;
    zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
 	    work[indwrk], &llwork, &iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, first call */
 /*     ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */
 /*     tridiagonal matrix, then call ZUNMTR to multiply it to the */
 /*     Householder transformations represented as Householder vectors in */
 /*     A. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	zstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], 
 		&llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
 	zunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
 		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
 	zlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	d__1 = 1. / sigma;
 	dscal_(&imax, &d__1, &w[1], &c__1);
    }

    work[1].r = (doublereal) lopt, work[1].i = 0.;
    rwork[1] = (doublereal) lropt;
    iwork[1] = liopt;

    return 0;

 /*     End of ZHEEVD */

 } /* zheevd_ */

--- a/lapack-netlib/SRC/zheevd_2stage.c
+++ b/lapack-netlib/SRC/zheevd_2stage.c
@@ -0,0 +1,886 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static integer c__3 = 3;
 static integer c__4 = 4;
 static integer c__0 = 0;
 static doublereal c_b28 = 1.;

 /* > \brief <b> ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for
 HE matrices</b> */

 /*  @precisions fortran z -> s d c */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEEVD_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheevd_
 2stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheevd_
 2stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheevd_
 2stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, */
 /*                          RWORK, LRWORK, IWORK, LIWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a */
 /* > complex Hermitian matrix A using the 2stage technique for */
 /* > the reduction to tridiagonal.  If eigenvectors are desired, it uses a */
 /* > divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          orthonormal eigenvectors of the matrix A. */
 /* >          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */
 /* >          or the upper triangle (if UPLO='U') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If N <= 1,               LWORK must be at least 1. */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = f2cmax(stage1,stage2) + (KD+1)*N + N+1 */
 /* >                                             = N*KD + N*f2cmax(KD+1,FACTOPTNB) */
 /* >                                               + f2cmax(2*KD*KD, KD*NTHREADS) */
 /* >                                               + (KD+1)*N + N+1 */
 /* >                                   where KD is the blocking size of the reduction, */
 /* >                                   FACTOPTNB is the blocking used by the QR or LQ */
 /* >                                   algorithm, usually FACTOPTNB=128 is a good choice */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2 */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, */
 /* >                                         dimension (LRWORK) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of the array RWORK. */
 /* >          If N <= 1,                LRWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LRWORK must be at least N. */
 /* >          If JOBZ  = 'V' and N > 1, LRWORK must be at least */
 /* >                         1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of the array IWORK. */
 /* >          If N <= 1,                LIWORK must be at least 1. */
 /* >          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1. */
 /* >          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed */
 /* >                to converge; i off-diagonal elements of an intermediate */
 /* >                tridiagonal form did not converge to zero; */
 /* >                if INFO = i and JOBZ = 'V', then the algorithm failed */
 /* >                to compute an eigenvalue while working on the submatrix */
 /* >                lying in rows and columns INFO/(N+1) through */
 /* >                mod(INFO,N+1). */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16HEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* >  Modified description of INFO. Sven, 16 Feb 05. */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* > Jeff Rutter, Computer Science Division, University of California */
 /* > at Berkeley, USA */
 /* > */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int zheevd_2stage_(char *jobz, char *uplo, integer *n, 
 	doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, 
 	integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, 
 	integer *liwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1;
    doublereal d__1;

    /* Local variables */
    integer inde;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    doublereal anrm;
    integer imax;
    doublereal rmin, rmax;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    doublereal sigma;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int zhetrd_2stage_(char *, char *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublereal *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, integer *);
    integer iinfo, lhtrd, lwmin;
    logical lower;
    integer llrwk, lwtrd;
    logical wantz;
    integer indwk2, ib, llwrk2, kd;
    extern doublereal dlamch_(char *);
    integer iscale;
    doublereal safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum;
    extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, 
 	    integer *, doublereal *);
    integer indtau;
    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), zlascl_(char *, integer *, integer *, doublereal *, 
 	    doublereal *, integer *, integer *, doublecomplex *, integer *, 
 	    integer *), zstedc_(char *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublereal *, integer *, integer *, integer *, integer 
 	    *);
    integer indrwk, indwrk, liwmin;
    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
    integer lrwmin, llwork;
    doublereal smlnum;
    logical lquery;
    extern /* Subroutine */ int zunmtr_(char *, char *, char *, integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
    doublereal eps;
    integer indhous;



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --w;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    lower = lsame_(uplo, "L");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (! lsame_(jobz, "N")) {
 	*info = -1;
    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }

    if (*info == 0) {
 	if (*n <= 1) {
 	    lwmin = 1;
 	    lrwmin = 1;
 	    liwmin = 1;
 	} else {
 	    kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", jobz, n, &c_n1, &c_n1, 
 		    &c_n1);
 	    ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", jobz, n, &kd, &c_n1, &
 		    c_n1);
 	    lhtrd = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", jobz, n, &kd, &ib, &
 		    c_n1);
 	    lwtrd = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", jobz, n, &kd, &ib, &
 		    c_n1);
 	    if (wantz) {
 		lwmin = (*n << 1) + *n * *n;
 /* Computing 2nd power */
 		i__1 = *n;
 		lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 		liwmin = *n * 5 + 3;
 	    } else {
 		lwmin = *n + 1 + lhtrd + lwtrd;
 		lrwmin = *n;
 		liwmin = 1;
 	    }
 	}
 	work[1].r = (doublereal) lwmin, work[1].i = 0.;
 	rwork[1] = (doublereal) lrwmin;
 	iwork[1] = liwmin;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -8;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -10;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEEVD_2STAGE", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

    if (*n == 1) {
 	i__1 = a_dim1 + 1;
 	w[1] = a[i__1].r;
 	if (wantz) {
 	    i__1 = a_dim1 + 1;
 	    a[i__1].r = 1., a[i__1].i = 0.;
 	}
 	return 0;
    }

 /*     Get machine constants. */

    safmin = dlamch_("Safe minimum");
    eps = dlamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1. / smlnum;
    rmin = sqrt(smlnum);
    rmax = sqrt(bignum);

 /*     Scale matrix to allowable range, if necessary. */

    anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
    iscale = 0;
    if (anrm > 0. && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
    } else if (anrm > rmax) {
 	iscale = 1;
 	sigma = rmax / anrm;
    }
    if (iscale == 1) {
 	zlascl_(uplo, &c__0, &c__0, &c_b28, &sigma, n, n, &a[a_offset], lda, 
 		info);
    }

 /*     Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. */

    inde = 1;
    indrwk = inde + *n;
    llrwk = *lrwork - indrwk + 1;
    indtau = 1;
    indhous = indtau + *n;
    indwrk = indhous + lhtrd;
    llwork = *lwork - indwrk + 1;
    indwk2 = indwrk + *n * *n;
    llwrk2 = *lwork - indwk2 + 1;

    zhetrd_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &
 	    work[indtau], &work[indhous], &lhtrd, &work[indwrk], &llwork, &
 	    iinfo);

 /*     For eigenvalues only, call DSTERF.  For eigenvectors, first call */
 /*     ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */
 /*     tridiagonal matrix, then call ZUNMTR to multiply it to the */
 /*     Householder transformations represented as Householder vectors in */
 /*     A. */

    if (! wantz) {
 	dsterf_(n, &w[1], &rwork[inde], info);
    } else {
 	zstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], 
 		&llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
 	zunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
 		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
 	zlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
    }

 /*     If matrix was scaled, then rescale eigenvalues appropriately. */

    if (iscale == 1) {
 	if (*info == 0) {
 	    imax = *n;
 	} else {
 	    imax = *info - 1;
 	}
 	d__1 = 1. / sigma;
 	dscal_(&imax, &d__1, &w[1], &c__1);
    }

    work[1].r = (doublereal) lwmin, work[1].i = 0.;
    rwork[1] = (doublereal) lrwmin;
    iwork[1] = liwmin;

    return 0;

 /*     End of ZHEEVD_2STAGE */

 } /* zheevd_2stage__ */

--- a/lapack-netlib/SRC/zheevr.c
+++ b/lapack-netlib/SRC/zheevr.c
--- a/lapack-netlib/SRC/zheevr_2stage.c
+++ b/lapack-netlib/SRC/zheevr_2stage.c
--- a/lapack-netlib/SRC/zheevx.c
+++ b/lapack-netlib/SRC/zheevx.c
--- a/lapack-netlib/SRC/zheevx_2stage.c
+++ b/lapack-netlib/SRC/zheevx_2stage.c
--- a/lapack-netlib/SRC/zhegs2.c
+++ b/lapack-netlib/SRC/zhegs2.c
@@ -0,0 +1,770 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZHEGS2 reduces a Hermitian definite generalized eigenproblem to standard form, using the factor
 ization results obtained from cpotrf (unblocked algorithm). */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEGS2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegs2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegs2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegs2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, N */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEGS2 reduces a complex Hermitian-definite generalized */
 /* > eigenproblem to standard form. */
 /* > */
 /* > If ITYPE = 1, the problem is A*x = lambda*B*x, */
 /* > and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */
 /* > */
 /* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */
 /* > B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L. */
 /* > */
 /* > B must have been previously factorized as U**H *U or L*L**H by ZPOTRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */
 /* >          = 2 or 3: compute U*A*U**H or L**H *A*L. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the upper or lower triangular part of the */
 /* >          Hermitian matrix A is stored, and how B has been factorized. */
 /* >          = 'U':  Upper triangular */
 /* >          = 'L':  Lower triangular */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          n by n upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading n by n lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the transformed matrix, stored in the */
 /* >          same format as A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,N) */
 /* >          The triangular factor from the Cholesky factorization of B, */
 /* >          as returned by ZPOTRF. */
 /* >          B is modified by the routine but restored on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zhegs2_(integer *itype, char *uplo, integer *n, 
 	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    doublereal d__1, d__2;
    doublecomplex z__1;

    /* Local variables */
    extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *);
    integer k;
    extern logical lsame_(char *, char *);
    logical upper;
    extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *), ztrmv_(
 	    char *, char *, char *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *), ztrsv_(char *
 	    , char *, char *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *);
    doublecomplex ct;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zdscal_(
 	    integer *, doublereal *, doublecomplex *, integer *), zlacgv_(
 	    integer *, doublecomplex *, integer *);
    doublereal akk, bkk;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEGS2", &i__1, (ftnlen)6);
 	return 0;
    }

    if (*itype == 1) {
 	if (upper) {

 /*           Compute inv(U**H)*A*inv(U) */

 	    i__1 = *n;
 	    for (k = 1; k <= i__1; ++k) {

 /*              Update the upper triangle of A(k:n,k:n) */

 		i__2 = k + k * a_dim1;
 		akk = a[i__2].r;
 		i__2 = k + k * b_dim1;
 		bkk = b[i__2].r;
 /* Computing 2nd power */
 		d__1 = bkk;
 		akk /= d__1 * d__1;
 		i__2 = k + k * a_dim1;
 		a[i__2].r = akk, a[i__2].i = 0.;
 		if (k < *n) {
 		    i__2 = *n - k;
 		    d__1 = 1. / bkk;
 		    zdscal_(&i__2, &d__1, &a[k + (k + 1) * a_dim1], lda);
 		    d__1 = akk * -.5;
 		    ct.r = d__1, ct.i = 0.;
 		    i__2 = *n - k;
 		    zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda);
 		    i__2 = *n - k;
 		    zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb);
 		    i__2 = *n - k;
 		    zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (
 			    k + 1) * a_dim1], lda);
 		    i__2 = *n - k;
 		    z__1.r = -1., z__1.i = 0.;
 		    zher2_(uplo, &i__2, &z__1, &a[k + (k + 1) * a_dim1], lda, 
 			    &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) 
 			    * a_dim1], lda);
 		    i__2 = *n - k;
 		    zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (
 			    k + 1) * a_dim1], lda);
 		    i__2 = *n - k;
 		    zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb);
 		    i__2 = *n - k;
 		    ztrsv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[
 			    k + 1 + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * 
 			    a_dim1], lda);
 		    i__2 = *n - k;
 		    zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda);
 		}
 /* L10: */
 	    }
 	} else {

 /*           Compute inv(L)*A*inv(L**H) */

 	    i__1 = *n;
 	    for (k = 1; k <= i__1; ++k) {

 /*              Update the lower triangle of A(k:n,k:n) */

 		i__2 = k + k * a_dim1;
 		akk = a[i__2].r;
 		i__2 = k + k * b_dim1;
 		bkk = b[i__2].r;
 /* Computing 2nd power */
 		d__1 = bkk;
 		akk /= d__1 * d__1;
 		i__2 = k + k * a_dim1;
 		a[i__2].r = akk, a[i__2].i = 0.;
 		if (k < *n) {
 		    i__2 = *n - k;
 		    d__1 = 1. / bkk;
 		    zdscal_(&i__2, &d__1, &a[k + 1 + k * a_dim1], &c__1);
 		    d__1 = akk * -.5;
 		    ct.r = d__1, ct.i = 0.;
 		    i__2 = *n - k;
 		    zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 
 			    1 + k * a_dim1], &c__1);
 		    i__2 = *n - k;
 		    z__1.r = -1., z__1.i = 0.;
 		    zher2_(uplo, &i__2, &z__1, &a[k + 1 + k * a_dim1], &c__1, 
 			    &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) 
 			    * a_dim1], lda);
 		    i__2 = *n - k;
 		    zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 
 			    1 + k * a_dim1], &c__1);
 		    i__2 = *n - k;
 		    ztrsv_(uplo, "No transpose", "Non-unit", &i__2, &b[k + 1 
 			    + (k + 1) * b_dim1], ldb, &a[k + 1 + k * a_dim1], 
 			    &c__1);
 		}
 /* L20: */
 	    }
 	}
    } else {
 	if (upper) {

 /*           Compute U*A*U**H */

 	    i__1 = *n;
 	    for (k = 1; k <= i__1; ++k) {

 /*              Update the upper triangle of A(1:k,1:k) */

 		i__2 = k + k * a_dim1;
 		akk = a[i__2].r;
 		i__2 = k + k * b_dim1;
 		bkk = b[i__2].r;
 		i__2 = k - 1;
 		ztrmv_(uplo, "No transpose", "Non-unit", &i__2, &b[b_offset], 
 			ldb, &a[k * a_dim1 + 1], &c__1);
 		d__1 = akk * .5;
 		ct.r = d__1, ct.i = 0.;
 		i__2 = k - 1;
 		zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 
 			1], &c__1);
 		i__2 = k - 1;
 		zher2_(uplo, &i__2, &c_b1, &a[k * a_dim1 + 1], &c__1, &b[k * 
 			b_dim1 + 1], &c__1, &a[a_offset], lda);
 		i__2 = k - 1;
 		zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 
 			1], &c__1);
 		i__2 = k - 1;
 		zdscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1);
 		i__2 = k + k * a_dim1;
 /* Computing 2nd power */
 		d__2 = bkk;
 		d__1 = akk * (d__2 * d__2);
 		a[i__2].r = d__1, a[i__2].i = 0.;
 /* L30: */
 	    }
 	} else {

 /*           Compute L**H *A*L */

 	    i__1 = *n;
 	    for (k = 1; k <= i__1; ++k) {

 /*              Update the lower triangle of A(1:k,1:k) */

 		i__2 = k + k * a_dim1;
 		akk = a[i__2].r;
 		i__2 = k + k * b_dim1;
 		bkk = b[i__2].r;
 		i__2 = k - 1;
 		zlacgv_(&i__2, &a[k + a_dim1], lda);
 		i__2 = k - 1;
 		ztrmv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[
 			b_offset], ldb, &a[k + a_dim1], lda);
 		d__1 = akk * .5;
 		ct.r = d__1, ct.i = 0.;
 		i__2 = k - 1;
 		zlacgv_(&i__2, &b[k + b_dim1], ldb);
 		i__2 = k - 1;
 		zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda);
 		i__2 = k - 1;
 		zher2_(uplo, &i__2, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1]
 			, ldb, &a[a_offset], lda);
 		i__2 = k - 1;
 		zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda);
 		i__2 = k - 1;
 		zlacgv_(&i__2, &b[k + b_dim1], ldb);
 		i__2 = k - 1;
 		zdscal_(&i__2, &bkk, &a[k + a_dim1], lda);
 		i__2 = k - 1;
 		zlacgv_(&i__2, &a[k + a_dim1], lda);
 		i__2 = k + k * a_dim1;
 /* Computing 2nd power */
 		d__2 = bkk;
 		d__1 = akk * (d__2 * d__2);
 		a[i__2].r = d__1, a[i__2].i = 0.;
 /* L40: */
 	    }
 	}
    }
    return 0;

 /*     End of ZHEGS2 */

 } /* zhegs2_ */

--- a/lapack-netlib/SRC/zhegst.c
+++ b/lapack-netlib/SRC/zhegst.c
@@ -0,0 +1,787 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static doublecomplex c_b2 = {.5,0.};
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static doublereal c_b18 = 1.;

 /* > \brief \b ZHEGST */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEGST + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegst.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegst.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegst.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, N */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEGST reduces a complex Hermitian-definite generalized */
 /* > eigenproblem to standard form. */
 /* > */
 /* > If ITYPE = 1, the problem is A*x = lambda*B*x, */
 /* > and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */
 /* > */
 /* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */
 /* > B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. */
 /* > */
 /* > B must have been previously factorized as U**H*U or L*L**H by ZPOTRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */
 /* >          = 2 or 3: compute U*A*U**H or L**H*A*L. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored and B is factored as */
 /* >                  U**H*U; */
 /* >          = 'L':  Lower triangle of A is stored and B is factored as */
 /* >                  L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the transformed matrix, stored in the */
 /* >          same format as A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,N) */
 /* >          The triangular factor from the Cholesky factorization of B, */
 /* >          as returned by ZPOTRF. */
 /* >          B is modified by the routine but restored on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zhegst_(integer *itype, char *uplo, integer *n, 
 	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
    doublecomplex z__1;

    /* Local variables */
    integer k;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int zhemm_(char *, char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *);
    logical upper;
    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *), 
 	    ztrsm_(char *, char *, char *, char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *), zhegs2_(integer *, 
 	    char *, integer *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, integer *), zher2k_(char *, char *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublecomplex *, 
 	    integer *);
    integer kb, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEGST", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "ZHEGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);

    if (nb <= 1 || nb >= *n) {

 /*        Use unblocked code */

 	zhegs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    } else {

 /*        Use blocked code */

 	if (*itype == 1) {
 	    if (upper) {

 /*              Compute inv(U**H)*A*inv(U) */

 		i__1 = *n;
 		i__2 = nb;
 		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
 /* Computing MIN */
 		    i__3 = *n - k + 1;
 		    kb = f2cmin(i__3,nb);

 /*                 Update the upper triangle of A(k:n,k:n) */

 		    zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
 			    k * b_dim1], ldb, info);
 		    if (k + kb <= *n) {
 			i__3 = *n - k - kb + 1;
 			ztrsm_("Left", uplo, "Conjugate transpose", "Non-unit"
 				, &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, 
 				&a[k + (k + kb) * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			z__1.r = -.5, z__1.i = 0.;
 			zhemm_("Left", uplo, &kb, &i__3, &z__1, &a[k + k * 
 				a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, 
 				&c_b1, &a[k + (k + kb) * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			z__1.r = -1., z__1.i = 0.;
 			zher2k_(uplo, "Conjugate transpose", &i__3, &kb, &
 				z__1, &a[k + (k + kb) * a_dim1], lda, &b[k + (
 				k + kb) * b_dim1], ldb, &c_b18, &a[k + kb + (
 				k + kb) * a_dim1], lda)
 				;
 			i__3 = *n - k - kb + 1;
 			z__1.r = -.5, z__1.i = 0.;
 			zhemm_("Left", uplo, &kb, &i__3, &z__1, &a[k + k * 
 				a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, 
 				&c_b1, &a[k + (k + kb) * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			ztrsm_("Right", uplo, "No transpose", "Non-unit", &kb,
 				 &i__3, &c_b1, &b[k + kb + (k + kb) * b_dim1],
 				 ldb, &a[k + (k + kb) * a_dim1], lda);
 		    }
 /* L10: */
 		}
 	    } else {

 /*              Compute inv(L)*A*inv(L**H) */

 		i__2 = *n;
 		i__1 = nb;
 		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
 /* Computing MIN */
 		    i__3 = *n - k + 1;
 		    kb = f2cmin(i__3,nb);

 /*                 Update the lower triangle of A(k:n,k:n) */

 		    zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
 			    k * b_dim1], ldb, info);
 		    if (k + kb <= *n) {
 			i__3 = *n - k - kb + 1;
 			ztrsm_("Right", uplo, "Conjugate transpose", "Non-un"
 				"it", &i__3, &kb, &c_b1, &b[k + k * b_dim1], 
 				ldb, &a[k + kb + k * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			z__1.r = -.5, z__1.i = 0.;
 			zhemm_("Right", uplo, &i__3, &kb, &z__1, &a[k + k * 
 				a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &
 				c_b1, &a[k + kb + k * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			z__1.r = -1., z__1.i = 0.;
 			zher2k_(uplo, "No transpose", &i__3, &kb, &z__1, &a[k 
 				+ kb + k * a_dim1], lda, &b[k + kb + k * 
 				b_dim1], ldb, &c_b18, &a[k + kb + (k + kb) * 
 				a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			z__1.r = -.5, z__1.i = 0.;
 			zhemm_("Right", uplo, &i__3, &kb, &z__1, &a[k + k * 
 				a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &
 				c_b1, &a[k + kb + k * a_dim1], lda);
 			i__3 = *n - k - kb + 1;
 			ztrsm_("Left", uplo, "No transpose", "Non-unit", &
 				i__3, &kb, &c_b1, &b[k + kb + (k + kb) * 
 				b_dim1], ldb, &a[k + kb + k * a_dim1], lda);
 		    }
 /* L20: */
 		}
 	    }
 	} else {
 	    if (upper) {

 /*              Compute U*A*U**H */

 		i__1 = *n;
 		i__2 = nb;
 		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
 /* Computing MIN */
 		    i__3 = *n - k + 1;
 		    kb = f2cmin(i__3,nb);

 /*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */

 		    i__3 = k - 1;
 		    ztrmm_("Left", uplo, "No transpose", "Non-unit", &i__3, &
 			    kb, &c_b1, &b[b_offset], ldb, &a[k * a_dim1 + 1], 
 			    lda);
 		    i__3 = k - 1;
 		    zhemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * 
 			    a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[
 			    k * a_dim1 + 1], lda);
 		    i__3 = k - 1;
 		    zher2k_(uplo, "No transpose", &i__3, &kb, &c_b1, &a[k * 
 			    a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b18,
 			     &a[a_offset], lda);
 		    i__3 = k - 1;
 		    zhemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * 
 			    a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[
 			    k * a_dim1 + 1], lda);
 		    i__3 = k - 1;
 		    ztrmm_("Right", uplo, "Conjugate transpose", "Non-unit", &
 			    i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, &a[k * 
 			    a_dim1 + 1], lda);
 		    zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
 			    k * b_dim1], ldb, info);
 /* L30: */
 		}
 	    } else {

 /*              Compute L**H*A*L */

 		i__2 = *n;
 		i__1 = nb;
 		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
 /* Computing MIN */
 		    i__3 = *n - k + 1;
 		    kb = f2cmin(i__3,nb);

 /*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */

 		    i__3 = k - 1;
 		    ztrmm_("Right", uplo, "No transpose", "Non-unit", &kb, &
 			    i__3, &c_b1, &b[b_offset], ldb, &a[k + a_dim1], 
 			    lda);
 		    i__3 = k - 1;
 		    zhemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1]
 			    , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1],
 			     lda);
 		    i__3 = k - 1;
 		    zher2k_(uplo, "Conjugate transpose", &i__3, &kb, &c_b1, &
 			    a[k + a_dim1], lda, &b[k + b_dim1], ldb, &c_b18, &
 			    a[a_offset], lda);
 		    i__3 = k - 1;
 		    zhemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1]
 			    , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1],
 			     lda);
 		    i__3 = k - 1;
 		    ztrmm_("Left", uplo, "Conjugate transpose", "Non-unit", &
 			    kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, &a[k + 
 			    a_dim1], lda);
 		    zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
 			    k * b_dim1], ldb, info);
 /* L40: */
 		}
 	    }
 	}
    }
    return 0;

 /*     End of ZHEGST */

 } /* zhegst_ */

--- a/lapack-netlib/SRC/zhegv.c
+++ b/lapack-netlib/SRC/zhegv.c
@@ -0,0 +1,738 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b ZHEGV */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEGV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */
 /*                         LWORK, RWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEGV computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite eigenproblem, of the form */
 /* > A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x. */
 /* > Here A and B are assumed to be Hermitian and B is also */
 /* > positive definite. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          Specifies the problem type to be solved: */
 /* >          = 1:  A*x = (lambda)*B*x */
 /* >          = 2:  A*B*x = (lambda)*x */
 /* >          = 3:  B*A*x = (lambda)*x */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* > */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          matrix Z of eigenvectors.  The eigenvectors are normalized */
 /* >          as follows: */
 /* >          if ITYPE = 1 or 2, Z**H*B*Z = I; */
 /* >          if ITYPE = 3, Z**H*inv(B)*Z = I. */
 /* >          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */
 /* >          or the lower triangle (if UPLO='L') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB, N) */
 /* >          On entry, the Hermitian positive definite matrix B. */
 /* >          If UPLO = 'U', the leading N-by-N upper triangular part of B */
 /* >          contains the upper triangular part of the matrix B. */
 /* >          If UPLO = 'L', the leading N-by-N lower triangular part of B */
 /* >          contains the lower triangular part of the matrix B. */
 /* > */
 /* >          On exit, if INFO <= N, the part of B containing the matrix is */
 /* >          overwritten by the triangular factor U or L from the Cholesky */
 /* >          factorization B = U**H*U or B = L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,2*N-1). */
 /* >          For optimal efficiency, LWORK >= (NB+1)*N, */
 /* >          where NB is the blocksize for ZHETRD returned by ILAENV. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, 3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  ZPOTRF or ZHEEV returned an error code: */
 /* >             <= N:  if INFO = i, ZHEEV failed to converge; */
 /* >                    i off-diagonal elements of an intermediate */
 /* >                    tridiagonal form did not converge to zero; */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
 /* >                    minor of order i of B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEeigen */

 /*  ===================================================================== */
 /* Subroutine */ int zhegv_(integer *itype, char *jobz, char *uplo, integer *
 	n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, 
 	doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork,
 	 integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    integer neig;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int zheev_(char *, char *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublecomplex *, 
 	    integer *, doublereal *, integer *);
    char trans[1];
    logical upper, wantz;
    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *), 
 	    ztrsm_(char *, char *, char *, char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *);
    integer nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zhegst_(integer *, char *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int zpotrf_(char *, integer *, doublecomplex *, 
 	    integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --w;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1;

    *info = 0;
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -6;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    }

    if (*info == 0) {
 	nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 /* Computing MAX */
 	i__1 = 1, i__2 = (nb + 1) * *n;
 	lwkopt = f2cmax(i__1,i__2);
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;

 /* Computing MAX */
 	i__1 = 1, i__2 = (*n << 1) - 1;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -11;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEGV ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a Cholesky factorization of B. */

    zpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem and solve. */

    zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    zheev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[1]
 	    , info);

    if (wantz) {

 /*        Backtransform eigenvectors to the original problem. */

 	neig = *n;
 	if (*info > 0) {
 	    neig = *info - 1;
 	}
 	if (*itype == 1 || *itype == 2) {

 /*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'N';
 	    } else {
 		*(unsigned char *)trans = 'C';
 	    }

 	    ztrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[
 		    b_offset], ldb, &a[a_offset], lda);

 	} else if (*itype == 3) {

 /*           For B*A*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = L*y or U**H *y */

 	    if (upper) {
 		*(unsigned char *)trans = 'C';
 	    } else {
 		*(unsigned char *)trans = 'N';
 	    }

 	    ztrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[
 		    b_offset], ldb, &a[a_offset], lda);
 	}
    }

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;

    return 0;

 /*     End of ZHEGV */

 } /* zhegv_ */

--- a/lapack-netlib/SRC/zhegv_2stage.c
+++ b/lapack-netlib/SRC/zhegv_2stage.c
@@ -0,0 +1,796 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static integer c__3 = 3;
 static integer c__4 = 4;

 /* > \brief \b ZHEGV_2STAGE */

 /*  @precisions fortran z -> c */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEGV_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegv_2
 stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegv_2
 stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegv_2
 stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, */
 /*                                WORK, LWORK, RWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite eigenproblem, of the form */
 /* > A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x. */
 /* > Here A and B are assumed to be Hermitian and B is also */
 /* > positive definite. */
 /* > This routine use the 2stage technique for the reduction to tridiagonal */
 /* > which showed higher performance on recent architecture and for large */
 /* > sizes N>2000. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          Specifies the problem type to be solved: */
 /* >          = 1:  A*x = (lambda)*B*x */
 /* >          = 2:  A*B*x = (lambda)*x */
 /* >          = 3:  B*A*x = (lambda)*x */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* >                  Not available in this release. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* > */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          matrix Z of eigenvectors.  The eigenvectors are normalized */
 /* >          as follows: */
 /* >          if ITYPE = 1 or 2, Z**H*B*Z = I; */
 /* >          if ITYPE = 3, Z**H*inv(B)*Z = I. */
 /* >          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */
 /* >          or the lower triangle (if UPLO='L') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB, N) */
 /* >          On entry, the Hermitian positive definite matrix B. */
 /* >          If UPLO = 'U', the leading N-by-N upper triangular part of B */
 /* >          contains the upper triangular part of the matrix B. */
 /* >          If UPLO = 'L', the leading N-by-N lower triangular part of B */
 /* >          contains the lower triangular part of the matrix B. */
 /* > */
 /* >          On exit, if INFO <= N, the part of B containing the matrix is */
 /* >          overwritten by the triangular factor U or L from the Cholesky */
 /* >          factorization B = U**H*U or B = L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. LWORK >= 1, when N <= 1; */
 /* >          otherwise */
 /* >          If JOBZ = 'N' and N > 1, LWORK must be queried. */
 /* >                                   LWORK = MAX(1, dimension) where */
 /* >                                   dimension = f2cmax(stage1,stage2) + (KD+1)*N + N */
 /* >                                             = N*KD + N*f2cmax(KD+1,FACTOPTNB) */
 /* >                                               + f2cmax(2*KD*KD, KD*NTHREADS) */
 /* >                                               + (KD+1)*N + N */
 /* >                                   where KD is the blocking size of the reduction, */
 /* >                                   FACTOPTNB is the blocking used by the QR or LQ */
 /* >                                   algorithm, usually FACTOPTNB=128 is a good choice */
 /* >                                   NTHREADS is the number of threads used when */
 /* >                                   openMP compilation is enabled, otherwise =1. */
 /* >          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, 3*N-2)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  ZPOTRF or ZHEEV returned an error code: */
 /* >             <= N:  if INFO = i, ZHEEV failed to converge; */
 /* >                    i off-diagonal elements of an intermediate */
 /* >                    tridiagonal form did not converge to zero; */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
 /* >                    minor of order i of B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16HEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  All details about the 2stage techniques are available in: */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int zhegv_2stage_(integer *itype, char *jobz, char *uplo, 
 	integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer 
 	*ldb, doublereal *w, doublecomplex *work, integer *lwork, doublereal *
 	rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    integer neig;
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    extern /* Subroutine */ int zheev_2stage_(char *, char *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublecomplex *, 
 	    integer *, doublereal *, integer *);
    extern logical lsame_(char *, char *);
    integer lhtrd, lwmin;
    char trans[1];
    logical upper;
    integer lwtrd;
    logical wantz;
    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *), 
 	    ztrsm_(char *, char *, char *, char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *);
    integer ib, kd;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zhegst_(
 	    integer *, char *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, integer *);
    logical lquery;
    extern /* Subroutine */ int zpotrf_(char *, integer *, doublecomplex *, 
 	    integer *, integer *);



 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --w;
    --work;
    --rwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1;

    *info = 0;
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! lsame_(jobz, "N")) {
 	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -6;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    }

    if (*info == 0) {
 	kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", jobz, n, &c_n1, &c_n1, &
 		c_n1);
 	ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", jobz, n, &kd, &c_n1, &c_n1);
 	lhtrd = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", jobz, n, &kd, &ib, &
 		c_n1);
 	lwtrd = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", jobz, n, &kd, &ib, &
 		c_n1);
 	lwmin = *n + lhtrd + lwtrd;
 	work[1].r = (doublereal) lwmin, work[1].i = 0.;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEGV_2STAGE ", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a Cholesky factorization of B. */

    zpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem and solve. */

    zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    zheev_2stage_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &
 	    rwork[1], info);

    if (wantz) {

 /*        Backtransform eigenvectors to the original problem. */

 	neig = *n;
 	if (*info > 0) {
 	    neig = *info - 1;
 	}
 	if (*itype == 1 || *itype == 2) {

 /*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'N';
 	    } else {
 		*(unsigned char *)trans = 'C';
 	    }

 	    ztrsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[
 		    b_offset], ldb, &a[a_offset], lda);

 	} else if (*itype == 3) {

 /*           For B*A*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = L*y or U**H *y */

 	    if (upper) {
 		*(unsigned char *)trans = 'C';
 	    } else {
 		*(unsigned char *)trans = 'N';
 	    }

 	    ztrmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b1, &b[
 		    b_offset], ldb, &a[a_offset], lda);
 	}
    }

    work[1].r = (doublereal) lwmin, work[1].i = 0.;

    return 0;

 /*     End of ZHEGV_2STAGE */

 } /* zhegv_2stage__ */

--- a/lapack-netlib/SRC/zhegvd.c
+++ b/lapack-netlib/SRC/zhegvd.c
@@ -0,0 +1,830 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};

 /* > \brief \b ZHEGVD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEGVD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegvd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegvd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegvd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */
 /*                          LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) */

 /*       CHARACTER          JOBZ, UPLO */
 /*       INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N */
 /*       INTEGER            IWORK( * ) */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEGVD computes all the eigenvalues, and optionally, the eigenvectors */
 /* > of a complex generalized Hermitian-definite eigenproblem, of the form */
 /* > A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and */
 /* > B are assumed to be Hermitian and B is also positive definite. */
 /* > If eigenvectors are desired, it uses a divide and conquer algorithm. */
 /* > */
 /* > The divide and conquer algorithm makes very mild assumptions about */
 /* > floating point arithmetic. It will work on machines with a guard */
 /* > digit in add/subtract, or on those binary machines without guard */
 /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
 /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          Specifies the problem type to be solved: */
 /* >          = 1:  A*x = (lambda)*B*x */
 /* >          = 2:  A*B*x = (lambda)*x */
 /* >          = 3:  B*A*x = (lambda)*x */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* > */
 /* >          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
 /* >          matrix Z of eigenvectors.  The eigenvectors are normalized */
 /* >          as follows: */
 /* >          if ITYPE = 1 or 2, Z**H*B*Z = I; */
 /* >          if ITYPE = 3, Z**H*inv(B)*Z = I. */
 /* >          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */
 /* >          or the lower triangle (if UPLO='L') of A, including the */
 /* >          diagonal, is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB, N) */
 /* >          On entry, the Hermitian matrix B.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of B contains the */
 /* >          upper triangular part of the matrix B.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of B contains */
 /* >          the lower triangular part of the matrix B. */
 /* > */
 /* >          On exit, if INFO <= N, the part of B containing the matrix is */
 /* >          overwritten by the triangular factor U or L from the Cholesky */
 /* >          factorization B = U**H*U or B = L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, the eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK. */
 /* >          If N <= 1,                LWORK >= 1. */
 /* >          If JOBZ  = 'N' and N > 1, LWORK >= N + 1. */
 /* >          If JOBZ  = 'V' and N > 1, LWORK >= 2*N + N**2. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal sizes of the WORK, RWORK and */
 /* >          IWORK arrays, returns these values as the first entries of */
 /* >          the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) */
 /* >          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LRWORK */
 /* > \verbatim */
 /* >          LRWORK is INTEGER */
 /* >          The dimension of the array RWORK. */
 /* >          If N <= 1,                LRWORK >= 1. */
 /* >          If JOBZ  = 'N' and N > 1, LRWORK >= N. */
 /* >          If JOBZ  = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */
 /* > */
 /* >          If LRWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
 /* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LIWORK */
 /* > \verbatim */
 /* >          LIWORK is INTEGER */
 /* >          The dimension of the array IWORK. */
 /* >          If N <= 1,                LIWORK >= 1. */
 /* >          If JOBZ  = 'N' and N > 1, LIWORK >= 1. */
 /* >          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N. */
 /* > */
 /* >          If LIWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal sizes of the WORK, RWORK */
 /* >          and IWORK arrays, returns these values as the first entries */
 /* >          of the WORK, RWORK and IWORK arrays, and no error message */
 /* >          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  ZPOTRF or ZHEEVD returned an error code: */
 /* >             <= N:  if INFO = i and JOBZ = 'N', then the algorithm */
 /* >                    failed to converge; i off-diagonal elements of an */
 /* >                    intermediate tridiagonal form did not converge to */
 /* >                    zero; */
 /* >                    if INFO = i and JOBZ = 'V', then the algorithm */
 /* >                    failed to compute an eigenvalue while working on */
 /* >                    the submatrix lying in rows and columns INFO/(N+1) */
 /* >                    through mod(INFO,N+1); */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
 /* >                    minor of order i of B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEeigen */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  Modified so that no backsubstitution is performed if ZHEEVD fails to */
 /* >  converge (NEIG in old code could be greater than N causing out of */
 /* >  bounds reference to A - reported by Ralf Meyer).  Also corrected the */
 /* >  description of INFO and the test on ITYPE. Sven, 16 Feb 05. */
 /* > \endverbatim */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zhegvd_(integer *itype, char *jobz, char *uplo, integer *
 	n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, 
 	doublereal *w, doublecomplex *work, integer *lwork, doublereal *rwork,
 	 integer *lrwork, integer *iwork, integer *liwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    doublereal d__1, d__2;

    /* Local variables */
    integer lopt;
    extern logical lsame_(char *, char *);
    integer lwmin;
    char trans[1];
    integer liopt;
    logical upper;
    integer lropt;
    logical wantz;
    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *), 
 	    ztrsm_(char *, char *, char *, char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *), xerbla_(char *, 
 	    integer *, ftnlen), zheevd_(char *, char *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublecomplex *, 
 	    integer *, doublereal *, integer *, integer *, integer *, integer 
 	    *);
    integer liwmin;
    extern /* Subroutine */ int zhegst_(integer *, char *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
    integer lrwmin;
    logical lquery;
    extern /* Subroutine */ int zpotrf_(char *, integer *, doublecomplex *, 
 	    integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --w;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (*n <= 1) {
 	lwmin = 1;
 	lrwmin = 1;
 	liwmin = 1;
    } else if (wantz) {
 	lwmin = (*n << 1) + *n * *n;
 	lrwmin = *n * 5 + 1 + (*n << 1) * *n;
 	liwmin = *n * 5 + 3;
    } else {
 	lwmin = *n + 1;
 	lrwmin = *n;
 	liwmin = 1;
    }
    lopt = lwmin;
    lropt = lrwmin;
    liopt = liwmin;
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -6;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    }

    if (*info == 0) {
 	work[1].r = (doublereal) lopt, work[1].i = 0.;
 	rwork[1] = (doublereal) lropt;
 	iwork[1] = liopt;

 	if (*lwork < lwmin && ! lquery) {
 	    *info = -11;
 	} else if (*lrwork < lrwmin && ! lquery) {
 	    *info = -13;
 	} else if (*liwork < liwmin && ! lquery) {
 	    *info = -15;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEGVD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form a Cholesky factorization of B. */

    zpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem and solve. */

    zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    zheevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[
 	    1], lrwork, &iwork[1], liwork, info);
 /* Computing MAX */
    d__1 = (doublereal) lopt, d__2 = work[1].r;
    lopt = (integer) f2cmax(d__1,d__2);
 /* Computing MAX */
    d__1 = (doublereal) lropt;
    lropt = (integer) f2cmax(d__1,rwork[1]);
 /* Computing MAX */
    d__1 = (doublereal) liopt, d__2 = (doublereal) iwork[1];
    liopt = (integer) f2cmax(d__1,d__2);

    if (wantz && *info == 0) {

 /*        Backtransform eigenvectors to the original problem. */

 	if (*itype == 1 || *itype == 2) {

 /*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'N';
 	    } else {
 		*(unsigned char *)trans = 'C';
 	    }

 	    ztrsm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset],
 		     ldb, &a[a_offset], lda);

 	} else if (*itype == 3) {

 /*           For B*A*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = L*y or U**H *y */

 	    if (upper) {
 		*(unsigned char *)trans = 'C';
 	    } else {
 		*(unsigned char *)trans = 'N';
 	    }

 	    ztrmm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset],
 		     ldb, &a[a_offset], lda);
 	}
    }

    work[1].r = (doublereal) lopt, work[1].i = 0.;
    rwork[1] = (doublereal) lropt;
    iwork[1] = liopt;

    return 0;

 /*     End of ZHEGVD */

 } /* zhegvd_ */

--- a/lapack-netlib/SRC/zhegvx.c
+++ b/lapack-netlib/SRC/zhegvx.c
@@ -0,0 +1,896 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b ZHEGVX */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHEGVX + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegvx.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegvx.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegvx.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, */
 /*                          VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, */
 /*                          LWORK, RWORK, IWORK, IFAIL, INFO ) */

 /*       CHARACTER          JOBZ, RANGE, UPLO */
 /*       INTEGER            IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N */
 /*       DOUBLE PRECISION   ABSTOL, VL, VU */
 /*       INTEGER            IFAIL( * ), IWORK( * ) */
 /*       DOUBLE PRECISION   RWORK( * ), W( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHEGVX computes selected eigenvalues, and optionally, eigenvectors */
 /* > of a complex generalized Hermitian-definite eigenproblem, of the form */
 /* > A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and */
 /* > B are assumed to be Hermitian and B is also positive definite. */
 /* > Eigenvalues and eigenvectors can be selected by specifying either a */
 /* > range of values or a range of indices for the desired eigenvalues. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITYPE */
 /* > \verbatim */
 /* >          ITYPE is INTEGER */
 /* >          Specifies the problem type to be solved: */
 /* >          = 1:  A*x = (lambda)*B*x */
 /* >          = 2:  A*B*x = (lambda)*x */
 /* >          = 3:  B*A*x = (lambda)*x */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBZ */
 /* > \verbatim */
 /* >          JOBZ is CHARACTER*1 */
 /* >          = 'N':  Compute eigenvalues only; */
 /* >          = 'V':  Compute eigenvalues and eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] RANGE */
 /* > \verbatim */
 /* >          RANGE is CHARACTER*1 */
 /* >          = 'A': all eigenvalues will be found. */
 /* >          = 'V': all eigenvalues in the half-open interval (VL,VU] */
 /* >                 will be found. */
 /* >          = 'I': the IL-th through IU-th eigenvalues will be found. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangles of A and B are stored; */
 /* >          = 'L':  Lower triangles of A and B are stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA, N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of A contains the */
 /* >          upper triangular part of the matrix A.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of A contains */
 /* >          the lower triangular part of the matrix A. */
 /* > */
 /* >          On exit,  the lower triangle (if UPLO='L') or the upper */
 /* >          triangle (if UPLO='U') of A, including the diagonal, is */
 /* >          destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB, N) */
 /* >          On entry, the Hermitian matrix B.  If UPLO = 'U', the */
 /* >          leading N-by-N upper triangular part of B contains the */
 /* >          upper triangular part of the matrix B.  If UPLO = 'L', */
 /* >          the leading N-by-N lower triangular part of B contains */
 /* >          the lower triangular part of the matrix B. */
 /* > */
 /* >          On exit, if INFO <= N, the part of B containing the matrix is */
 /* >          overwritten by the triangular factor U or L from the Cholesky */
 /* >          factorization B = U**H*U or B = L*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] VL */
 /* > \verbatim */
 /* >          VL is DOUBLE PRECISION */
 /* > */
 /* >          If RANGE='V', the lower bound of the interval to */
 /* >          be searched for eigenvalues. VL < VU. */
 /* >          Not referenced if RANGE = 'A' or 'I'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] VU */
 /* > \verbatim */
 /* >          VU is DOUBLE PRECISION */
 /* > */
 /* >          If RANGE='V', the upper bound of the interval to */
 /* >          be searched for eigenvalues. VL < VU. */
 /* >          Not referenced if RANGE = 'A' or 'I'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IL */
 /* > \verbatim */
 /* >          IL is INTEGER */
 /* > */
 /* >          If RANGE='I', the index of the */
 /* >          smallest eigenvalue to be returned. */
 /* >          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
 /* >          Not referenced if RANGE = 'A' or 'V'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IU */
 /* > \verbatim */
 /* >          IU is INTEGER */
 /* > */
 /* >          If RANGE='I', the index of the */
 /* >          largest eigenvalue to be returned. */
 /* >          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
 /* >          Not referenced if RANGE = 'A' or 'V'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ABSTOL */
 /* > \verbatim */
 /* >          ABSTOL is DOUBLE PRECISION */
 /* >          The absolute error tolerance for the eigenvalues. */
 /* >          An approximate eigenvalue is accepted as converged */
 /* >          when it is determined to lie in an interval [a,b] */
 /* >          of width less than or equal to */
 /* > */
 /* >                  ABSTOL + EPS *   f2cmax( |a|,|b| ) , */
 /* > */
 /* >          where EPS is the machine precision.  If ABSTOL is less than */
 /* >          or equal to zero, then  EPS*|T|  will be used in its place, */
 /* >          where |T| is the 1-norm of the tridiagonal matrix obtained */
 /* >          by reducing C to tridiagonal form, where C is the symmetric */
 /* >          matrix of the standard symmetric problem to which the */
 /* >          generalized problem is transformed. */
 /* > */
 /* >          Eigenvalues will be computed most accurately when ABSTOL is */
 /* >          set to twice the underflow threshold 2*DLAMCH('S'), not zero. */
 /* >          If this routine returns with INFO>0, indicating that some */
 /* >          eigenvectors did not converge, try setting ABSTOL to */
 /* >          2*DLAMCH('S'). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The total number of eigenvalues found.  0 <= M <= N. */
 /* >          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] W */
 /* > \verbatim */
 /* >          W is DOUBLE PRECISION array, dimension (N) */
 /* >          The first M elements contain the selected */
 /* >          eigenvalues in ascending order. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Z */
 /* > \verbatim */
 /* >          Z is COMPLEX*16 array, dimension (LDZ, f2cmax(1,M)) */
 /* >          If JOBZ = 'N', then Z is not referenced. */
 /* >          If JOBZ = 'V', then if INFO = 0, the first M columns of Z */
 /* >          contain the orthonormal eigenvectors of the matrix A */
 /* >          corresponding to the selected eigenvalues, with the i-th */
 /* >          column of Z holding the eigenvector associated with W(i). */
 /* >          The eigenvectors are normalized as follows: */
 /* >          if ITYPE = 1 or 2, Z**T*B*Z = I; */
 /* >          if ITYPE = 3, Z**T*inv(B)*Z = I. */
 /* > */
 /* >          If an eigenvector fails to converge, then that column of Z */
 /* >          contains the latest approximation to the eigenvector, and the */
 /* >          index of the eigenvector is returned in IFAIL. */
 /* >          Note: the user must ensure that at least f2cmax(1,M) columns are */
 /* >          supplied in the array Z; if RANGE = 'V', the exact value of M */
 /* >          is not known in advance and an upper bound must be used. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z.  LDZ >= 1, and if */
 /* >          JOBZ = 'V', LDZ >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,2*N). */
 /* >          For optimal efficiency, LWORK >= (NB+1)*N, */
 /* >          where NB is the blocksize for ZHETRD returned by ILAENV. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (7*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (5*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IFAIL */
 /* > \verbatim */
 /* >          IFAIL is INTEGER array, dimension (N) */
 /* >          If JOBZ = 'V', then if INFO = 0, the first M elements of */
 /* >          IFAIL are zero.  If INFO > 0, then IFAIL contains the */
 /* >          indices of the eigenvectors that failed to converge. */
 /* >          If JOBZ = 'N', then IFAIL is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  ZPOTRF or ZHEEVX returned an error code: */
 /* >             <= N:  if INFO = i, ZHEEVX failed to converge; */
 /* >                    i eigenvectors failed to converge.  Their indices */
 /* >                    are stored in array IFAIL. */
 /* >             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
 /* >                    minor of order i of B is not positive definite. */
 /* >                    The factorization of B could not be completed and */
 /* >                    no eigenvalues or eigenvectors were computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup complex16HEeigen */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

 /*  ===================================================================== */
 /* Subroutine */ int zhegvx_(integer *itype, char *jobz, char *range, char *
 	uplo, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, 
 	integer *ldb, doublereal *vl, doublereal *vu, integer *il, integer *
 	iu, doublereal *abstol, integer *m, doublereal *w, doublecomplex *z__,
 	 integer *ldz, doublecomplex *work, integer *lwork, doublereal *rwork,
 	 integer *iwork, integer *ifail, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1, i__2;

    /* Local variables */
    extern logical lsame_(char *, char *);
    char trans[1];
    logical upper, wantz;
    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *), 
 	    ztrsm_(char *, char *, char *, char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *);
    integer nb;
    logical alleig, indeig, valeig;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zhegst_(integer *, char *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), zheevx_(char *, char *, char *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublereal *, integer *,
 	     integer *, doublereal *, integer *, doublereal *, doublecomplex *
 	    , integer *, doublecomplex *, integer *, doublereal *, integer *, 
 	    integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int zpotrf_(char *, integer *, doublecomplex *, 
 	    integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;
    --ifail;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    alleig = lsame_(range, "A");
    valeig = lsame_(range, "V");
    indeig = lsame_(range, "I");
    lquery = *lwork == -1;

    *info = 0;
    if (*itype < 1 || *itype > 3) {
 	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -2;
    } else if (! (alleig || valeig || indeig)) {
 	*info = -3;
    } else if (! (upper || lsame_(uplo, "L"))) {
 	*info = -4;
    } else if (*n < 0) {
 	*info = -5;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -9;
    } else {
 	if (valeig) {
 	    if (*n > 0 && *vu <= *vl) {
 		*info = -11;
 	    }
 	} else if (indeig) {
 	    if (*il < 1 || *il > f2cmax(1,*n)) {
 		*info = -12;
 	    } else if (*iu < f2cmin(*n,*il) || *iu > *n) {
 		*info = -13;
 	    }
 	}
    }
    if (*info == 0) {
 	if (*ldz < 1 || wantz && *ldz < *n) {
 	    *info = -18;
 	}
    }

    if (*info == 0) {
 	nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 /* Computing MAX */
 	i__1 = 1, i__2 = (nb + 1) * *n;
 	lwkopt = f2cmax(i__1,i__2);
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;

 /* Computing MAX */
 	i__1 = 1, i__2 = *n << 1;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -20;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHEGVX", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    *m = 0;
    if (*n == 0) {
 	return 0;
    }

 /*     Form a Cholesky factorization of B. */

    zpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
 	*info = *n + *info;
 	return 0;
    }

 /*     Transform problem to standard eigenvalue problem and solve. */

    zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    zheevx_(jobz, range, uplo, n, &a[a_offset], lda, vl, vu, il, iu, abstol, 
 	    m, &w[1], &z__[z_offset], ldz, &work[1], lwork, &rwork[1], &iwork[
 	    1], &ifail[1], info);

    if (wantz) {

 /*        Backtransform eigenvectors to the original problem. */

 	if (*info > 0) {
 	    *m = *info - 1;
 	}
 	if (*itype == 1 || *itype == 2) {

 /*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y */

 	    if (upper) {
 		*(unsigned char *)trans = 'N';
 	    } else {
 		*(unsigned char *)trans = 'C';
 	    }

 	    ztrsm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset],
 		     ldb, &z__[z_offset], ldz);

 	} else if (*itype == 3) {

 /*           For B*A*x=(lambda)*x; */
 /*           backtransform eigenvectors: x = L*y or U**H *y */

 	    if (upper) {
 		*(unsigned char *)trans = 'C';
 	    } else {
 		*(unsigned char *)trans = 'N';
 	    }

 	    ztrmm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset],
 		     ldb, &z__[z_offset], ldz);
 	}
    }

 /*     Set WORK(1) to optimal complex workspace size. */

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;

    return 0;

 /*     End of ZHEGVX */

 } /* zhegvx_ */

--- a/lapack-netlib/SRC/zherfs.c
+++ b/lapack-netlib/SRC/zherfs.c
@@ -0,0 +1,926 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {1.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZHERFS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHERFS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zherfs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zherfs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zherfs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */
 /*                          X, LDX, FERR, BERR, WORK, RWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * ) */
 /*       COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
 /*      $                   WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHERFS improves the computed solution to a system of linear */
 /* > equations when the coefficient matrix is Hermitian indefinite, and */
 /* > provides error bounds and backward error estimates for the solution. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrices B and X.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N */
 /* >          upper triangular part of A contains the upper triangular part */
 /* >          of the matrix A, and the strictly lower triangular part of A */
 /* >          is not referenced.  If UPLO = 'L', the leading N-by-N lower */
 /* >          triangular part of A contains the lower triangular part of */
 /* >          the matrix A, and the strictly upper triangular part of A is */
 /* >          not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AF */
 /* > \verbatim */
 /* >          AF is COMPLEX*16 array, dimension (LDAF,N) */
 /* >          The factored form of the matrix A.  AF contains the block */
 /* >          diagonal matrix D and the multipliers used to obtain the */
 /* >          factor U or L from the factorization A = U*D*U**H or */
 /* >          A = L*D*L**H as computed by ZHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAF */
 /* > \verbatim */
 /* >          LDAF is INTEGER */
 /* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D */
 /* >          as determined by ZHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          The right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] X */
 /* > \verbatim */
 /* >          X is COMPLEX*16 array, dimension (LDX,NRHS) */
 /* >          On entry, the solution matrix X, as computed by ZHETRS. */
 /* >          On exit, the improved solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  ITMAX is the maximum number of steps of iterative refinement. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int zherfs_(char *uplo, integer *n, integer *nrhs, 
 	doublecomplex *a, integer *lda, doublecomplex *af, integer *ldaf, 
 	integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, 
 	integer *ldx, doublereal *ferr, doublereal *berr, doublecomplex *work,
 	 doublereal *rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
 	    x_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1;

    /* Local variables */
    integer kase;
    doublereal safe1, safe2;
    integer i__, j, k;
    doublereal s;
    extern logical lsame_(char *, char *);
    integer isave[3], count;
    extern /* Subroutine */ int zhemv_(char *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *);
    logical upper;
    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *), zlacn2_(
 	    integer *, doublecomplex *, doublecomplex *, doublereal *, 
 	    integer *, integer *);
    extern doublereal dlamch_(char *);
    doublereal xk;
    integer nz;
    doublereal safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal lstres;
    extern /* Subroutine */ int zhetrs_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
 	     integer *);
    doublereal eps;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldaf < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -10;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -12;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHERFS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	i__1 = *nrhs;
 	for (j = 1; j <= i__1; ++j) {
 	    ferr[j] = 0.;
 	    berr[j] = 0.;
 /* L10: */
 	}
 	return 0;
    }

 /*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = dlamch_("Epsilon");
    safmin = dlamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

 /*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

 	count = 1;
 	lstres = 3.;
 L20:

 /*        Loop until stopping criterion is satisfied. */

 /*        Compute residual R = B - A * X */

 	zcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);
 	z__1.r = -1., z__1.i = 0.;
 	zhemv_(uplo, n, &z__1, &a[a_offset], lda, &x[j * x_dim1 + 1], &c__1, &
 		c_b1, &work[1], &c__1);

 /*        Compute componentwise relative backward error from formula */

 /*        f2cmax(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */

 /*        where abs(Z) is the componentwise absolute value of the matrix */
 /*        or vector Z.  If the i-th component of the denominator is less */
 /*        than SAFE2, then SAFE1 is added to the i-th components of the */
 /*        numerator and denominator before dividing. */

 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    i__3 = i__ + j * b_dim1;
 	    rwork[i__] = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = d_imag(&b[
 		    i__ + j * b_dim1]), abs(d__2));
 /* L30: */
 	}

 /*        Compute abs(A)*abs(X) + abs(B). */

 	if (upper) {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		s = 0.;
 		i__3 = k + j * x_dim1;
 		xk = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[k + j *
 			 x_dim1]), abs(d__2));
 		i__3 = k - 1;
 		for (i__ = 1; i__ <= i__3; ++i__) {
 		    i__4 = i__ + k * a_dim1;
 		    rwork[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = 
 			    d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * xk;
 		    i__4 = i__ + k * a_dim1;
 		    i__5 = i__ + j * x_dim1;
 		    s += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[
 			    i__ + k * a_dim1]), abs(d__2))) * ((d__3 = x[i__5]
 			    .r, abs(d__3)) + (d__4 = d_imag(&x[i__ + j * 
 			    x_dim1]), abs(d__4)));
 /* L40: */
 		}
 		i__3 = k + k * a_dim1;
 		rwork[k] = rwork[k] + (d__1 = a[i__3].r, abs(d__1)) * xk + s;
 /* L50: */
 	    }
 	} else {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		s = 0.;
 		i__3 = k + j * x_dim1;
 		xk = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[k + j *
 			 x_dim1]), abs(d__2));
 		i__3 = k + k * a_dim1;
 		rwork[k] += (d__1 = a[i__3].r, abs(d__1)) * xk;
 		i__3 = *n;
 		for (i__ = k + 1; i__ <= i__3; ++i__) {
 		    i__4 = i__ + k * a_dim1;
 		    rwork[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = 
 			    d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * xk;
 		    i__4 = i__ + k * a_dim1;
 		    i__5 = i__ + j * x_dim1;
 		    s += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[
 			    i__ + k * a_dim1]), abs(d__2))) * ((d__3 = x[i__5]
 			    .r, abs(d__3)) + (d__4 = d_imag(&x[i__ + j * 
 			    x_dim1]), abs(d__4)));
 /* L60: */
 		}
 		rwork[k] += s;
 /* L70: */
 	    }
 	}
 	s = 0.;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (rwork[i__] > safe2) {
 /* Computing MAX */
 		i__3 = i__;
 		d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&work[i__]), abs(d__2))) / rwork[i__];
 		s = f2cmax(d__3,d__4);
 	    } else {
 /* Computing MAX */
 		i__3 = i__;
 		d__3 = s, d__4 = ((d__1 = work[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&work[i__]), abs(d__2)) + safe1) / (rwork[i__] 
 			+ safe1);
 		s = f2cmax(d__3,d__4);
 	    }
 /* L80: */
 	}
 	berr[j] = s;

 /*        Test stopping criterion. Continue iterating if */
 /*           1) The residual BERR(J) is larger than machine epsilon, and */
 /*           2) BERR(J) decreased by at least a factor of 2 during the */
 /*              last iteration, and */
 /*           3) At most ITMAX iterations tried. */

 	if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) {

 /*           Update solution and try again. */

 	    zhetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], 
 		    n, info);
 	    zaxpy_(n, &c_b1, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1);
 	    lstres = berr[j];
 	    ++count;
 	    goto L20;
 	}

 /*        Bound error from formula */

 /*        norm(X - XTRUE) / norm(X) .le. FERR = */
 /*        norm( abs(inv(A))* */
 /*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */

 /*        where */
 /*          norm(Z) is the magnitude of the largest component of Z */
 /*          inv(A) is the inverse of A */
 /*          abs(Z) is the componentwise absolute value of the matrix or */
 /*             vector Z */
 /*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
 /*          EPS is machine epsilon */

 /*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */
 /*        is incremented by SAFE1 if the i-th component of */
 /*        abs(A)*abs(X) + abs(B) is less than SAFE2. */

 /*        Use ZLACN2 to estimate the infinity-norm of the matrix */
 /*           inv(A) * diag(W), */
 /*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */

 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (rwork[i__] > safe2) {
 		i__3 = i__;
 		rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__]
 			;
 	    } else {
 		i__3 = i__;
 		rwork[i__] = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = 
 			d_imag(&work[i__]), abs(d__2)) + nz * eps * rwork[i__]
 			 + safe1;
 	    }
 /* L90: */
 	}

 	kase = 0;
 L100:
 	zlacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave);
 	if (kase != 0) {
 	    if (kase == 1) {

 /*              Multiply by diag(W)*inv(A**H). */

 		zhetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[
 			1], n, info);
 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__;
 		    z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] 
 			    * work[i__5].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 /* L110: */
 		}
 	    } else if (kase == 2) {

 /*              Multiply by inv(A)*diag(W). */

 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    i__3 = i__;
 		    i__4 = i__;
 		    i__5 = i__;
 		    z__1.r = rwork[i__4] * work[i__5].r, z__1.i = rwork[i__4] 
 			    * work[i__5].i;
 		    work[i__3].r = z__1.r, work[i__3].i = z__1.i;
 /* L120: */
 		}
 		zhetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[
 			1], n, info);
 	    }
 	    goto L100;
 	}

 /*        Normalize error. */

 	lstres = 0.;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    i__3 = i__ + j * x_dim1;
 	    d__3 = lstres, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = 
 		    d_imag(&x[i__ + j * x_dim1]), abs(d__2));
 	    lstres = f2cmax(d__3,d__4);
 /* L130: */
 	}
 	if (lstres != 0.) {
 	    ferr[j] /= lstres;
 	}

 /* L140: */
    }

    return 0;

 /*     End of ZHERFS */

 } /* zherfs_ */

--- a/lapack-netlib/SRC/zherfsx.c
+++ b/lapack-netlib/SRC/zherfsx.c
@@ -0,0 +1,381 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
--- a/lapack-netlib/SRC/zhesv.c
+++ b/lapack-netlib/SRC/zhesv.c
@@ -0,0 +1,675 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief <b> ZHESV computes the solution to system of linear equations A * X = B for HE matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHESV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhesv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhesv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhesv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */
 /*                         LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHESV computes the solution to a complex system of linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > The diagonal pivoting method is used to factor A as */
 /* >    A = U * D * U**H,  if UPLO = 'U', or */
 /* >    A = L * D * L**H,  if UPLO = 'L', */
 /* > where U (or L) is a product of permutation and unit upper (lower) */
 /* > triangular matrices, and D is Hermitian and block diagonal with */
 /* > 1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then */
 /* > used to solve the system of equations A * X = B. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the block diagonal matrix D and the */
 /* >          multipliers used to obtain the factor U or L from the */
 /* >          factorization A = U*D*U**H or A = L*D*L**H as computed by */
 /* >          ZHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D, as */
 /* >          determined by ZHETRF.  If IPIV(k) > 0, then rows and columns */
 /* >          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 */
 /* >          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, */
 /* >          then rows and columns k-1 and -IPIV(k) were interchanged and */
 /* >          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and */
 /* >          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and */
 /* >          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 */
 /* >          diagonal block. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= 1, and for best performance */
 /* >          LWORK >= f2cmax(1,N*NB), where NB is the optimal blocksize for */
 /* >          ZHETRF. */
 /* >          for LWORK < N, TRS will be done with Level BLAS 2 */
 /* >          for LWORK >= N, TRS will be done with Level BLAS 3 */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization */
 /* >               has been completed, but the block diagonal matrix D is */
 /* >               exactly singular, so the solution could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int zhesv_(char *uplo, integer *n, integer *nrhs, 
 	doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, 
 	integer *ldb, doublecomplex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern logical lsame_(char *, char *);
    integer nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zhetrf_(char *, integer *, doublecomplex *, 
 	    integer *, integer *, doublecomplex *, integer *, integer *), zhetrs_(char *, integer *, integer *, doublecomplex *, 
 	    integer *, integer *, doublecomplex *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int zhetrs2_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
 	     doublecomplex *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -10;
    }

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkopt = 1;
 	} else {
 	    nb = ilaenv_(&c__1, "ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (
 		    ftnlen)6, (ftnlen)1);
 	    lwkopt = *n * nb;
 	}
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHESV ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Compute the factorization A = U*D*U**H or A = L*D*L**H. */

    zhetrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 	if (*lwork < *n) {

 /*        Solve with TRS ( Use Level BLAS 2) */

 	    zhetrs_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], 
 		    ldb, info);

 	} else {

 /*        Solve with TRS2 ( Use Level BLAS 3) */

 	    zhetrs2_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset],
 		     ldb, &work[1], info);

 	}

    }

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;

    return 0;

 /*     End of ZHESV */

 } /* zhesv_ */

--- a/lapack-netlib/SRC/zhesv_aa.c
+++ b/lapack-netlib/SRC/zhesv_aa.c
@@ -0,0 +1,651 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;

 /* > \brief <b> ZHESV_AA computes the solution to system of linear equations A * X = B for HE matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHESV_AA + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhesv_a
 a.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhesv_a
 a.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhesv_a
 a.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */
 /*                            LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHESV_AA computes the solution to a complex system of linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > Aasen's algorithm is used to factor A as */
 /* >    A = U**H * T * U,  if UPLO = 'U', or */
 /* >    A = L * T * L**H,  if UPLO = 'L', */
 /* > where U (or L) is a product of permutation and unit upper (lower) */
 /* > triangular matrices, and T is Hermitian and tridiagonal. The factored form */
 /* > of A is then used to solve the system of equations A * X = B. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the tridiagonal matrix T and the */
 /* >          multipliers used to obtain the factor U or L from the */
 /* >          factorization A = U**H*T*U or A = L*T*L**H as computed by */
 /* >          ZHETRF_AA. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          On exit, it contains the details of the interchanges, i.e., */
 /* >          the row and column k of A were interchanged with the */
 /* >          row and column IPIV(k). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= MAX(1,2*N,3*N-2), and for best */
 /* >          performance LWORK >= f2cmax(1,N*NB), where NB is the optimal */
 /* >          blocksize for ZHETRF. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization */
 /* >               has been completed, but the block diagonal matrix D is */
 /* >               exactly singular, so the solution could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16HEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int zhesv_aa_(char *uplo, integer *n, integer *nrhs, 
 	doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, 
 	integer *ldb, doublecomplex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    integer lwkopt_hetrf__, lwkopt_hetrs__;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int zhetrf_aa_(char *, integer *, doublecomplex *
 	    , integer *, integer *, doublecomplex *, integer *, integer *), zhetrs_aa_(char *, integer *, integer *, doublecomplex *
 	    , integer *, integer *, doublecomplex *, integer *, doublecomplex 
 	    *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    integer lwkopt;
    logical lquery;


 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = *n << 1, i__2 = *n * 3 - 2;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -10;
 	}
    }

    if (*info == 0) {
 	zhetrf_aa_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], &c_n1, 
 		info);
 	lwkopt_hetrf__ = (integer) work[1].r;
 	zhetrs_aa_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], 
 		ldb, &work[1], &c_n1, info);
 	lwkopt_hetrs__ = (integer) work[1].r;
 	lwkopt = f2cmax(lwkopt_hetrf__,lwkopt_hetrs__);
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHESV_AA ", &i__1, (ftnlen)9);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Compute the factorization A = U**H*T*U or A = L*T*L**H. */

    zhetrf_aa_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 	zhetrs_aa_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], 
 		ldb, &work[1], lwork, info);

    }

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;

    return 0;

 /*     End of ZHESV_AA */

 } /* zhesv_aa__ */

--- a/lapack-netlib/SRC/zhesv_aa_2stage.c
+++ b/lapack-netlib/SRC/zhesv_aa_2stage.c
@@ -0,0 +1,679 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;

 /* > \brief <b> ZHESV_AA_2STAGE computes the solution to system of linear equations A * X = B for HE matrices
 </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHESV_AA_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhesv_a
 a_2stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhesv_a
 a_2stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhesv_a
 a_2stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*      SUBROUTINE ZHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, */
 /*                                  IPIV, IPIV2, B, LDB, WORK, LWORK, */
 /*                                  INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            N, NRHS, LDA, LTB, LDB, LWORK, INFO */
 /*       INTEGER            IPIV( * ), IPIV2( * ) */
 /*       COMPLEX*16         A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHESV_AA_2STAGE computes the solution to a complex system of */
 /* > linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > Aasen's 2-stage algorithm is used to factor A as */
 /* >    A = U**H * T * U,  if UPLO = 'U', or */
 /* >    A = L * T * L**H,  if UPLO = 'L', */
 /* > where U (or L) is a product of permutation and unit upper (lower) */
 /* > triangular matrices, and T is Hermitian and band. The matrix T is */
 /* > then LU-factored with partial pivoting. The factored form of A */
 /* > is then used to solve the system of equations A * X = B. */
 /* > */
 /* > This is the blocked version of the algorithm, calling Level 3 BLAS. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, L is stored below (or above) the subdiaonal blocks, */
 /* >          when UPLO  is 'L' (or 'U'). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TB */
 /* > \verbatim */
 /* >          TB is COMPLEX*16 array, dimension (LTB) */
 /* >          On exit, details of the LU factorization of the band matrix. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LTB */
 /* > \verbatim */
 /* >          LTB is INTEGER */
 /* >          The size of the array TB. LTB >= 4*N, internally */
 /* >          used to select NB such that LTB >= (3*NB+1)*N. */
 /* > */
 /* >          If LTB = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal size of LTB, */
 /* >          returns this value as the first entry of TB, and */
 /* >          no error message related to LTB is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          On exit, it contains the details of the interchanges, i.e., */
 /* >          the row and column k of A were interchanged with the */
 /* >          row and column IPIV(k). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV2 */
 /* > \verbatim */
 /* >          IPIV2 is INTEGER array, dimension (N) */
 /* >          On exit, it contains the details of the interchanges, i.e., */
 /* >          the row and column k of T were interchanged with the */
 /* >          row and column IPIV(k). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the right hand side matrix B. */
 /* >          On exit, the solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 workspace of size LWORK */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The size of WORK. LWORK >= N, internally used to select NB */
 /* >          such that LWORK >= N*NB. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the */
 /* >          routine only calculates the optimal size of the WORK array, */
 /* >          returns this value as the first entry of the WORK array, and */
 /* >          no error message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = i, band LU factorization failed on i-th column */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16HEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int zhesv_aa_2stage_(char *uplo, integer *n, integer *nrhs, 
 	doublecomplex *a, integer *lda, doublecomplex *tb, integer *ltb, 
 	integer *ipiv, integer *ipiv2, doublecomplex *b, integer *ldb, 
 	doublecomplex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern /* Subroutine */ int zhetrf_aa_2stage_(char *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *,
 	     integer *, doublecomplex *, integer *, integer *), 
 	    zhetrs_aa_2stage_(char *, integer *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *);
    extern logical lsame_(char *, char *);
    logical upper;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    integer lwkopt;
    logical tquery, wquery;


 /*  -- LAPACK driver routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */



 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tb;
    --ipiv;
    --ipiv2;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    wquery = *lwork == -1;
    tquery = *ltb == -1;
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ltb < *n << 2 && ! tquery) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -11;
    } else if (*lwork < *n && ! wquery) {
 	*info = -13;
    }

    if (*info == 0) {
 	zhetrf_aa_2stage_(uplo, n, &a[a_offset], lda, &tb[1], &c_n1, &ipiv[1]
 		, &ipiv2[1], &work[1], &c_n1, info);
 	lwkopt = (integer) work[1].r;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHESV_AA_2STAGE", &i__1, (ftnlen)15);
 	return 0;
    } else if (wquery || tquery) {
 	return 0;
    }

 /*     Compute the factorization A = U**H*T*U or A = L*T*L**H. */

    zhetrf_aa_2stage_(uplo, n, &a[a_offset], lda, &tb[1], ltb, &ipiv[1], &
 	    ipiv2[1], &work[1], lwork, info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 	zhetrs_aa_2stage_(uplo, n, nrhs, &a[a_offset], lda, &tb[1], ltb, &
 		ipiv[1], &ipiv2[1], &b[b_offset], ldb, info);

    }

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;

    return 0;

 /*     End of ZHESV_AA_2STAGE */

 } /* zhesv_aa_2stage__ */

--- a/lapack-netlib/SRC/zhesv_rk.c
+++ b/lapack-netlib/SRC/zhesv_rk.c
@@ -0,0 +1,719 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;

 /* > \brief <b> ZHESV_RK computes the solution to system of linear equations A * X = B for SY matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHESV_RK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhesv_r
 k.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhesv_r
 k.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhesv_r
 k.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHESV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, */
 /*                            WORK, LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > ZHESV_RK computes the solution to a complex system of linear */
 /* > equations A * X = B, where A is an N-by-N Hermitian matrix */
 /* > and X and B are N-by-NRHS matrices. */
 /* > */
 /* > The bounded Bunch-Kaufman (rook) diagonal pivoting method is used */
 /* > to factor A as */
 /* >    A = P*U*D*(U**H)*(P**T),  if UPLO = 'U', or */
 /* >    A = P*L*D*(L**H)*(P**T),  if UPLO = 'L', */
 /* > where U (or L) is unit upper (or lower) triangular matrix, */
 /* > U**H (or L**H) is the conjugate of U (or L), P is a permutation */
 /* > matrix, P**T is the transpose of P, and D is Hermitian and block */
 /* > diagonal with 1-by-1 and 2-by-2 diagonal blocks. */
 /* > */
 /* > ZHETRF_RK is called to compute the factorization of a complex */
 /* > Hermitian matrix.  The factored form of A is then used to solve */
 /* > the system of equations A * X = B by calling BLAS3 routine ZHETRS_3. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the upper or lower triangular part of the */
 /* >          Hermitian matrix A is stored: */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A. */
 /* >            If UPLO = 'U': the leading N-by-N upper triangular part */
 /* >            of A contains the upper triangular part of the matrix A, */
 /* >            and the strictly lower triangular part of A is not */
 /* >            referenced. */
 /* > */
 /* >            If UPLO = 'L': the leading N-by-N lower triangular part */
 /* >            of A contains the lower triangular part of the matrix A, */
 /* >            and the strictly upper triangular part of A is not */
 /* >            referenced. */
 /* > */
 /* >          On exit, if INFO = 0, diagonal of the block diagonal */
 /* >          matrix D and factors U or L  as computed by ZHETRF_RK: */
 /* >            a) ONLY diagonal elements of the Hermitian block diagonal */
 /* >               matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); */
 /* >               (superdiagonal (or subdiagonal) elements of D */
 /* >                are stored on exit in array E), and */
 /* >            b) If UPLO = 'U': factor U in the superdiagonal part of A. */
 /* >               If UPLO = 'L': factor L in the subdiagonal part of A. */
 /* > */
 /* >          For more info see the description of ZHETRF_RK routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is COMPLEX*16 array, dimension (N) */
 /* >          On exit, contains the output computed by the factorization */
 /* >          routine ZHETRF_RK, i.e. the superdiagonal (or subdiagonal) */
 /* >          elements of the Hermitian block diagonal matrix D */
 /* >          with 1-by-1 or 2-by-2 diagonal blocks, where */
 /* >          If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; */
 /* >          If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. */
 /* > */
 /* >          NOTE: For 1-by-1 diagonal block D(k), where */
 /* >          1 <= k <= N, the element E(k) is set to 0 in both */
 /* >          UPLO = 'U' or UPLO = 'L' cases. */
 /* > */
 /* >          For more info see the description of ZHETRF_RK routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D, */
 /* >          as determined by ZHETRF_RK. */
 /* > */
 /* >          For more info see the description of ZHETRF_RK routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension ( MAX(1,LWORK) ). */
 /* >          Work array used in the factorization stage. */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= 1. For best performance */
 /* >          of factorization stage LWORK >= f2cmax(1,N*NB), where NB is */
 /* >          the optimal blocksize for ZHETRF_RK. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; */
 /* >          the routine only calculates the optimal size of the WORK */
 /* >          array for factorization stage, returns this value as */
 /* >          the first entry of the WORK array, and no error message */
 /* >          related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* > */
 /* >          < 0: If INFO = -k, the k-th argument had an illegal value */
 /* > */
 /* >          > 0: If INFO = k, the matrix A is singular, because: */
 /* >                 If UPLO = 'U': column k in the upper */
 /* >                 triangular part of A contains all zeros. */
 /* >                 If UPLO = 'L': column k in the lower */
 /* >                 triangular part of A contains all zeros. */
 /* > */
 /* >               Therefore D(k,k) is exactly zero, and superdiagonal */
 /* >               elements of column k of U (or subdiagonal elements of */
 /* >               column k of L ) are all zeros. The factorization has */
 /* >               been completed, but the block diagonal matrix D is */
 /* >               exactly singular, and division by zero will occur if */
 /* >               it is used to solve a system of equations. */
 /* > */
 /* >               NOTE: INFO only stores the first occurrence of */
 /* >               a singularity, any subsequent occurrence of singularity */
 /* >               is not stored in INFO even though the factorization */
 /* >               always completes. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEsolve */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  December 2016,  Igor Kozachenko, */
 /* >                  Computer Science Division, */
 /* >                  University of California, Berkeley */
 /* > */
 /* >  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */
 /* >                  School of Mathematics, */
 /* >                  University of Manchester */
 /* > */
 /* > \endverbatim */

 /*  ===================================================================== */
 /* Subroutine */ int zhesv_rk_(char *uplo, integer *n, integer *nrhs, 
 	doublecomplex *a, integer *lda, doublecomplex *e, integer *ipiv, 
 	doublecomplex *b, integer *ldb, doublecomplex *work, integer *lwork, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern /* Subroutine */ int zhetrs_3_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int zhetrf_rk_(char *, integer *, doublecomplex *
 	    , integer *, doublecomplex *, integer *, doublecomplex *, integer 
 	    *, integer *), xerbla_(char *, integer *, ftnlen);
    integer lwkopt;
    logical lquery;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --e;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -9;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -11;
    }

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkopt = 1;
 	} else {
 	    zhetrf_rk_(uplo, n, &a[a_offset], lda, &e[1], &ipiv[1], &work[1],
 		     &c_n1, info);
 	    lwkopt = (integer) work[1].r;
 	}
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHESV_RK ", &i__1, (ftnlen)9);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Compute the factorization A = P*U*D*(U**H)*(P**T) or */
 /*     A = P*U*D*(U**H)*(P**T). */

    zhetrf_rk_(uplo, n, &a[a_offset], lda, &e[1], &ipiv[1], &work[1], lwork, 
 	    info);

    if (*info == 0) {

 /*        Solve the system A*X = B with BLAS3 solver, overwriting B with X. */

 	zhetrs_3_(uplo, n, nrhs, &a[a_offset], lda, &e[1], &ipiv[1], &b[
 		b_offset], ldb, info);

    }

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;

    return 0;

 /*     End of ZHESV_RK */

 } /* zhesv_rk__ */

--- a/lapack-netlib/SRC/zhesv_rook.c
+++ b/lapack-netlib/SRC/zhesv_rook.c
@@ -0,0 +1,699 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b ZHESV_ROOK computes the solution to a system of linear equations A * X = B for HE matrices usin
 g the bounded Bunch-Kaufman ("rook") diagonal pivoting method */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHESV_ROOK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhesv_r
 ook.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhesv_r
 ook.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhesv_r
 ook.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, */
 /*                              LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDB, LWORK, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHESV_ROOK computes the solution to a complex system of linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used */
 /* > to factor A as */
 /* >    A = U * D * U**T,  if UPLO = 'U', or */
 /* >    A = L * D * L**T,  if UPLO = 'L', */
 /* > where U (or L) is a product of permutation and unit upper (lower) */
 /* > triangular matrices, and D is Hermitian and block diagonal with */
 /* > 1-by-1 and 2-by-2 diagonal blocks. */
 /* > */
 /* > ZHETRF_ROOK is called to compute the factorization of a complex */
 /* > Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal */
 /* > pivoting method. */
 /* > */
 /* > The factored form of A is then used to solve the system */
 /* > of equations A * X = B by calling ZHETRS_ROOK (uses BLAS 2). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* > */
 /* >          On exit, if INFO = 0, the block diagonal matrix D and the */
 /* >          multipliers used to obtain the factor U or L from the */
 /* >          factorization A = U*D*U**H or A = L*D*L**H as computed by */
 /* >          ZHETRF_ROOK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          Details of the interchanges and the block structure of D. */
 /* > */
 /* >          If UPLO = 'U': */
 /* >             Only the last KB elements of IPIV are set. */
 /* > */
 /* >             If IPIV(k) > 0, then rows and columns k and IPIV(k) were */
 /* >             interchanged and D(k,k) is a 1-by-1 diagonal block. */
 /* > */
 /* >             If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and */
 /* >             columns k and -IPIV(k) were interchanged and rows and */
 /* >             columns k-1 and -IPIV(k-1) were inerchaged, */
 /* >             D(k-1:k,k-1:k) is a 2-by-2 diagonal block. */
 /* > */
 /* >          If UPLO = 'L': */
 /* >             Only the first KB elements of IPIV are set. */
 /* > */
 /* >             If IPIV(k) > 0, then rows and columns k and IPIV(k) */
 /* >             were interchanged and D(k,k) is a 1-by-1 diagonal block. */
 /* > */
 /* >             If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and */
 /* >             columns k and -IPIV(k) were interchanged and rows and */
 /* >             columns k+1 and -IPIV(k+1) were inerchaged, */
 /* >             D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= 1, and for best performance */
 /* >          LWORK >= f2cmax(1,N*NB), where NB is the optimal blocksize for */
 /* >          ZHETRF_ROOK. */
 /* >          for LWORK < N, TRS will be done with Level BLAS 2 */
 /* >          for LWORK >= N, TRS will be done with Level BLAS 3 */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization */
 /* >               has been completed, but the block diagonal matrix D is */
 /* >               exactly singular, so the solution could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2013 */

 /* > \ingroup complex16HEsolve */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  November 2013,  Igor Kozachenko, */
 /* >                  Computer Science Division, */
 /* >                  University of California, Berkeley */
 /* > */
 /* >  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, */
 /* >                  School of Mathematics, */
 /* >                  University of Manchester */
 /* > */
 /* > \endverbatim */


 /*  ===================================================================== */
 /* Subroutine */ int zhesv_rook_(char *uplo, integer *n, integer *nrhs, 
 	doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, 
 	integer *ldb, doublecomplex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern /* Subroutine */ int zhetrs_rook_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
 	     integer *);
    extern logical lsame_(char *, char *);
    integer nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int zhetrf_rook_(char *, integer *, 
 	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
 	     integer *);


 /*  -- LAPACK driver routine (version 3.5.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2013 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -10;
    }

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkopt = 1;
 	} else {
 	    nb = ilaenv_(&c__1, "ZHETRF_ROOK", uplo, n, &c_n1, &c_n1, &c_n1, (
 		    ftnlen)11, (ftnlen)1);
 	    lwkopt = *n * nb;
 	}
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHESV_ROOK ", &i__1, (ftnlen)11);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Compute the factorization A = U*D*U**H or A = L*D*L**H. */

    zhetrf_rook_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 /*        Solve with TRS ( Use Level BLAS 2) */

 	zhetrs_rook_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset]
 		, ldb, info);

    }

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;

    return 0;

 /*     End of ZHESV_ROOK */

 } /* zhesv_rook__ */

--- a/lapack-netlib/SRC/zhesvx.c
+++ b/lapack-netlib/SRC/zhesvx.c
@@ -0,0 +1,844 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief <b> ZHESVX computes the solution to system of linear equations A * X = B for HE matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHESVX + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhesvx.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhesvx.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhesvx.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, */
 /*                          LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, */
 /*                          RWORK, INFO ) */

 /*       CHARACTER          FACT, UPLO */
 /*       INTEGER            INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS */
 /*       DOUBLE PRECISION   RCOND */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * ) */
 /*       COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
 /*      $                   WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHESVX uses the diagonal pivoting factorization to compute the */
 /* > solution to a complex system of linear equations A * X = B, */
 /* > where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > Error bounds on the solution and a condition estimate are also */
 /* > provided. */
 /* > \endverbatim */

 /* > \par Description: */
 /*  ================= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > The following steps are performed: */
 /* > */
 /* > 1. If FACT = 'N', the diagonal pivoting method is used to factor A. */
 /* >    The form of the factorization is */
 /* >       A = U * D * U**H,  if UPLO = 'U', or */
 /* >       A = L * D * L**H,  if UPLO = 'L', */
 /* >    where U (or L) is a product of permutation and unit upper (lower) */
 /* >    triangular matrices, and D is Hermitian and block diagonal with */
 /* >    1-by-1 and 2-by-2 diagonal blocks. */
 /* > */
 /* > 2. If some D(i,i)=0, so that D is exactly singular, then the routine */
 /* >    returns with INFO = i. Otherwise, the factored form of A is used */
 /* >    to estimate the condition number of the matrix A.  If the */
 /* >    reciprocal of the condition number is less than machine precision, */
 /* >    INFO = N+1 is returned as a warning, but the routine still goes on */
 /* >    to solve for X and compute error bounds as described below. */
 /* > */
 /* > 3. The system of equations is solved for X using the factored form */
 /* >    of A. */
 /* > */
 /* > 4. Iterative refinement is applied to improve the computed solution */
 /* >    matrix and calculate error bounds and backward error estimates */
 /* >    for it. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] FACT */
 /* > \verbatim */
 /* >          FACT is CHARACTER*1 */
 /* >          Specifies whether or not the factored form of A has been */
 /* >          supplied on entry. */
 /* >          = 'F':  On entry, AF and IPIV contain the factored form */
 /* >                  of A.  A, AF and IPIV will not be modified. */
 /* >          = 'N':  The matrix A will be copied to AF and factored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrices B and X.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N */
 /* >          upper triangular part of A contains the upper triangular part */
 /* >          of the matrix A, and the strictly lower triangular part of A */
 /* >          is not referenced.  If UPLO = 'L', the leading N-by-N lower */
 /* >          triangular part of A contains the lower triangular part of */
 /* >          the matrix A, and the strictly upper triangular part of A is */
 /* >          not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AF */
 /* > \verbatim */
 /* >          AF is COMPLEX*16 array, dimension (LDAF,N) */
 /* >          If FACT = 'F', then AF is an input argument and on entry */
 /* >          contains the block diagonal matrix D and the multipliers used */
 /* >          to obtain the factor U or L from the factorization */
 /* >          A = U*D*U**H or A = L*D*L**H as computed by ZHETRF. */
 /* > */
 /* >          If FACT = 'N', then AF is an output argument and on exit */
 /* >          returns the block diagonal matrix D and the multipliers used */
 /* >          to obtain the factor U or L from the factorization */
 /* >          A = U*D*U**H or A = L*D*L**H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAF */
 /* > \verbatim */
 /* >          LDAF is INTEGER */
 /* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          If FACT = 'F', then IPIV is an input argument and on entry */
 /* >          contains details of the interchanges and the block structure */
 /* >          of D, as determined by ZHETRF. */
 /* >          If IPIV(k) > 0, then rows and columns k and IPIV(k) were */
 /* >          interchanged and D(k,k) is a 1-by-1 diagonal block. */
 /* >          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */
 /* >          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */
 /* >          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) = */
 /* >          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */
 /* >          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */
 /* > */
 /* >          If FACT = 'N', then IPIV is an output argument and on exit */
 /* >          contains details of the interchanges and the block structure */
 /* >          of D, as determined by ZHETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is COMPLEX*16 array, dimension (LDB,NRHS) */
 /* >          The N-by-NRHS right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is COMPLEX*16 array, dimension (LDX,NRHS) */
 /* >          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is DOUBLE PRECISION */
 /* >          The estimate of the reciprocal condition number of the matrix */
 /* >          A.  If RCOND is less than the machine precision (in */
 /* >          particular, if RCOND = 0), the matrix is singular to working */
 /* >          precision.  This condition is indicated by a return code of */
 /* >          INFO > 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of WORK.  LWORK >= f2cmax(1,2*N), and for best */
 /* >          performance, when FACT = 'N', LWORK >= f2cmax(1,2*N,N*NB), where */
 /* >          NB is the optimal blocksize for ZHETRF. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RWORK */
 /* > \verbatim */
 /* >          RWORK is DOUBLE PRECISION array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = i, and i is */
 /* >                <= N:  D(i,i) is exactly zero.  The factorization */
 /* >                       has been completed but the factor D is exactly */
 /* >                       singular, so the solution and error bounds could */
 /* >                       not be computed. RCOND = 0 is returned. */
 /* >                = N+1: D is nonsingular, but RCOND is less than machine */
 /* >                       precision, meaning that the matrix is singular */
 /* >                       to working precision.  Nevertheless, the */
 /* >                       solution and error bounds are computed because */
 /* >                       there are a number of situations where the */
 /* >                       computed solution can be more accurate than the */
 /* >                       value of RCOND would suggest. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date April 2012 */

 /* > \ingroup complex16HEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int zhesvx_(char *fact, char *uplo, integer *n, integer *
 	nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *
 	ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x,
 	 integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, 
 	doublecomplex *work, integer *lwork, doublereal *rwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
 	    x_offset, i__1, i__2;

    /* Local variables */
    extern logical lsame_(char *, char *);
    doublereal anorm;
    integer nb;
    extern doublereal dlamch_(char *);
    logical nofact;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, 
 	    integer *, doublereal *);
    extern /* Subroutine */ int zhecon_(char *, integer *, doublecomplex *, 
 	    integer *, integer *, doublereal *, doublereal *, doublecomplex *,
 	     integer *), zherfs_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, integer *,
 	     doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublereal *, doublereal *, doublecomplex *, doublereal *, 
 	    integer *), zhetrf_(char *, integer *, doublecomplex *, 
 	    integer *, integer *, doublecomplex *, integer *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, 
 	    integer *, doublecomplex *, integer *), zhetrs_(char *, 
 	    integer *, integer *, doublecomplex *, integer *, integer *, 
 	    doublecomplex *, integer *, integer *);
    integer lwkopt;
    logical lquery;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     April 2012 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    nofact = lsame_(fact, "N");
    lquery = *lwork == -1;
    if (! nofact && ! lsame_(fact, "F")) {
 	*info = -1;
    } else if (! lsame_(uplo, "U") && ! lsame_(uplo, 
 	    "L")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -6;
    } else if (*ldaf < f2cmax(1,*n)) {
 	*info = -8;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -11;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -13;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = *n << 1;
 	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 	    *info = -18;
 	}
    }

    if (*info == 0) {
 /* Computing MAX */
 	i__1 = 1, i__2 = *n << 1;
 	lwkopt = f2cmax(i__1,i__2);
 	if (nofact) {
 	    nb = ilaenv_(&c__1, "ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (
 		    ftnlen)6, (ftnlen)1);
 /* Computing MAX */
 	    i__1 = lwkopt, i__2 = *n * nb;
 	    lwkopt = f2cmax(i__1,i__2);
 	}
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHESVX", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

    if (nofact) {

 /*        Compute the factorization A = U*D*U**H or A = L*D*L**H. */

 	zlacpy_(uplo, n, n, &a[a_offset], lda, &af[af_offset], ldaf);
 	zhetrf_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &work[1], lwork, 
 		info);

 /*        Return if INFO is non-zero. */

 	if (*info > 0) {
 	    *rcond = 0.;
 	    return 0;
 	}
    }

 /*     Compute the norm of the matrix A. */

    anorm = zlanhe_("I", uplo, n, &a[a_offset], lda, &rwork[1]);

 /*     Compute the reciprocal of the condition number of A. */

    zhecon_(uplo, n, &af[af_offset], ldaf, &ipiv[1], &anorm, rcond, &work[1], 
 	    info);

 /*     Compute the solution vectors X. */

    zlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
    zhetrs_(uplo, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, 
 	    info);

 /*     Use iterative refinement to improve the computed solutions and */
 /*     compute error bounds and backward error estimates for them. */

    zherfs_(uplo, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], 
 	    &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[1]
 	    , &rwork[1], info);

 /*     Set INFO = N+1 if the matrix is singular to working precision. */

    if (*rcond < dlamch_("Epsilon")) {
 	*info = *n + 1;
    }

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;

    return 0;

 /*     End of ZHESVX */

 } /* zhesvx_ */

--- a/lapack-netlib/SRC/zhesvxx.c
+++ b/lapack-netlib/SRC/zhesvxx.c
--- a/lapack-netlib/SRC/zheswapr.c
+++ b/lapack-netlib/SRC/zheswapr.c
@@ -0,0 +1,630 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b ZHESWAPR applies an elementary permutation on the rows and columns of a Hermitian matrix. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHESWAPR + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheswap
 r.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheswap
 r.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheswap
 r.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHESWAPR( UPLO, N, A, LDA, I1, I2) */

 /*       CHARACTER        UPLO */
 /*       INTEGER          I1, I2, LDA, N */
 /*       COMPLEX*16          A( LDA, N ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHESWAPR applies an elementary permutation on the rows and the columns of */
 /* > a hermitian matrix. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the details of the factorization are stored */
 /* >          as an upper or lower triangular matrix. */
 /* >          = 'U':  Upper triangular, form is A = U*D*U**T; */
 /* >          = 'L':  Lower triangular, form is A = L*D*L**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the NB diagonal matrix D and the multipliers */
 /* >          used to obtain the factor U or L as computed by CSYTRF. */
 /* > */
 /* >          On exit, if INFO = 0, the (symmetric) inverse of the original */
 /* >          matrix.  If UPLO = 'U', the upper triangular part of the */
 /* >          inverse is formed and the part of A below the diagonal is not */
 /* >          referenced; if UPLO = 'L' the lower triangular part of the */
 /* >          inverse is formed and the part of A above the diagonal is */
 /* >          not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] I1 */
 /* > \verbatim */
 /* >          I1 is INTEGER */
 /* >          Index of the first row to swap */
 /* > \endverbatim */
 /* > */
 /* > \param[in] I2 */
 /* > \verbatim */
 /* >          I2 is INTEGER */
 /* >          Index of the second row to swap */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEauxiliary */

 /*  ===================================================================== */
 /* Subroutine */ int zheswapr_(char *uplo, integer *n, doublecomplex *a, 
 	integer *lda, integer *i1, integer *i2)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublecomplex z__1;

    /* Local variables */
    integer i__;
    extern logical lsame_(char *, char *);
    logical upper;
    extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *);
    doublecomplex tmp;


 /*  -- LAPACK auxiliary routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */



    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    upper = lsame_(uplo, "U");
    if (upper) {

 /*         UPPER */
 /*         first swap */
 /*          - swap column I1 and I2 from I1 to I1-1 */
 	i__1 = *i1 - 1;
 	zswap_(&i__1, &a[*i1 * a_dim1 + 1], &c__1, &a[*i2 * a_dim1 + 1], &
 		c__1);

 /*          second swap : */
 /*          - swap A(I1,I1) and A(I2,I2) */
 /*          - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 */
 /*          - swap A(I2,I1) and A(I1,I2) */
 	i__1 = *i1 + *i1 * a_dim1;
 	tmp.r = a[i__1].r, tmp.i = a[i__1].i;
 	i__1 = *i1 + *i1 * a_dim1;
 	i__2 = *i2 + *i2 * a_dim1;
 	a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
 	i__1 = *i2 + *i2 * a_dim1;
 	a[i__1].r = tmp.r, a[i__1].i = tmp.i;

 	i__1 = *i2 - *i1 - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = *i1 + (*i1 + i__) * a_dim1;
 	    tmp.r = a[i__2].r, tmp.i = a[i__2].i;
 	    i__2 = *i1 + (*i1 + i__) * a_dim1;
 	    d_cnjg(&z__1, &a[*i1 + i__ + *i2 * a_dim1]);
 	    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
 	    i__2 = *i1 + i__ + *i2 * a_dim1;
 	    d_cnjg(&z__1, &tmp);
 	    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
 	}

 	i__1 = *i1 + *i2 * a_dim1;
 	d_cnjg(&z__1, &a[*i1 + *i2 * a_dim1]);
 	a[i__1].r = z__1.r, a[i__1].i = z__1.i;

 /*          third swap */
 /*          - swap row I1 and I2 from I2+1 to N */
 	i__1 = *n;
 	for (i__ = *i2 + 1; i__ <= i__1; ++i__) {
 	    i__2 = *i1 + i__ * a_dim1;
 	    tmp.r = a[i__2].r, tmp.i = a[i__2].i;
 	    i__2 = *i1 + i__ * a_dim1;
 	    i__3 = *i2 + i__ * a_dim1;
 	    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
 	    i__2 = *i2 + i__ * a_dim1;
 	    a[i__2].r = tmp.r, a[i__2].i = tmp.i;
 	}

    } else {

 /*         LOWER */
 /*         first swap */
 /*          - swap row I1 and I2 from 1 to I1-1 */
 	i__1 = *i1 - 1;
 	zswap_(&i__1, &a[*i1 + a_dim1], lda, &a[*i2 + a_dim1], lda);

 /*         second swap : */
 /*          - swap A(I1,I1) and A(I2,I2) */
 /*          - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 */
 /*          - swap A(I2,I1) and A(I1,I2) */
 	i__1 = *i1 + *i1 * a_dim1;
 	tmp.r = a[i__1].r, tmp.i = a[i__1].i;
 	i__1 = *i1 + *i1 * a_dim1;
 	i__2 = *i2 + *i2 * a_dim1;
 	a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
 	i__1 = *i2 + *i2 * a_dim1;
 	a[i__1].r = tmp.r, a[i__1].i = tmp.i;

 	i__1 = *i2 - *i1 - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = *i1 + i__ + *i1 * a_dim1;
 	    tmp.r = a[i__2].r, tmp.i = a[i__2].i;
 	    i__2 = *i1 + i__ + *i1 * a_dim1;
 	    d_cnjg(&z__1, &a[*i2 + (*i1 + i__) * a_dim1]);
 	    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
 	    i__2 = *i2 + (*i1 + i__) * a_dim1;
 	    d_cnjg(&z__1, &tmp);
 	    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
 	}

 	i__1 = *i2 + *i1 * a_dim1;
 	d_cnjg(&z__1, &a[*i2 + *i1 * a_dim1]);
 	a[i__1].r = z__1.r, a[i__1].i = z__1.i;

 /*         third swap */
 /*          - swap col I1 and I2 from I2+1 to N */
 	i__1 = *n;
 	for (i__ = *i2 + 1; i__ <= i__1; ++i__) {
 	    i__2 = i__ + *i1 * a_dim1;
 	    tmp.r = a[i__2].r, tmp.i = a[i__2].i;
 	    i__2 = i__ + *i1 * a_dim1;
 	    i__3 = i__ + *i2 * a_dim1;
 	    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
 	    i__2 = i__ + *i2 * a_dim1;
 	    a[i__2].r = tmp.r, a[i__2].i = tmp.i;
 	}

    }
    return 0;
 } /* zheswapr_ */

--- a/lapack-netlib/SRC/zhetd2.c
+++ b/lapack-netlib/SRC/zhetd2.c
@@ -0,0 +1,796 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b2 = {0.,0.};
 static integer c__1 = 1;

 /* > \brief \b ZHETD2 reduces a Hermitian matrix to real symmetric tridiagonal form by an unitary similarity t
 ransformation (unblocked algorithm). */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHETD2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetd2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetd2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetd2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, N */
 /*       DOUBLE PRECISION   D( * ), E( * ) */
 /*       COMPLEX*16         A( LDA, * ), TAU( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHETD2 reduces a complex Hermitian matrix A to real symmetric */
 /* > tridiagonal form T by a unitary similarity transformation: */
 /* > Q**H * A * Q = T. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          Specifies whether the upper or lower triangular part of the */
 /* >          Hermitian matrix A is stored: */
 /* >          = 'U':  Upper triangular */
 /* >          = 'L':  Lower triangular */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          n-by-n upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading n-by-n lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* >          On exit, if UPLO = 'U', the diagonal and first superdiagonal */
 /* >          of A are overwritten by the corresponding elements of the */
 /* >          tridiagonal matrix T, and the elements above the first */
 /* >          superdiagonal, with the array TAU, represent the unitary */
 /* >          matrix Q as a product of elementary reflectors; if UPLO */
 /* >          = 'L', the diagonal and first subdiagonal of A are over- */
 /* >          written by the corresponding elements of the tridiagonal */
 /* >          matrix T, and the elements below the first subdiagonal, with */
 /* >          the array TAU, represent the unitary matrix Q as a product */
 /* >          of elementary reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is DOUBLE PRECISION array, dimension (N) */
 /* >          The diagonal elements of the tridiagonal matrix T: */
 /* >          D(i) = A(i,i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is DOUBLE PRECISION array, dimension (N-1) */
 /* >          The off-diagonal elements of the tridiagonal matrix T: */
 /* >          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX*16 array, dimension (N-1) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  If UPLO = 'U', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(n-1) . . . H(2) H(1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */
 /* >  A(1:i-1,i+1), and tau in TAU(i). */
 /* > */
 /* >  If UPLO = 'L', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(n-1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */
 /* >  and tau in TAU(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples */
 /* >  with n = 5: */
 /* > */
 /* >  if UPLO = 'U':                       if UPLO = 'L': */
 /* > */
 /* >    (  d   e   v2  v3  v4 )              (  d                  ) */
 /* >    (      d   e   v3  v4 )              (  e   d              ) */
 /* >    (          d   e   v4 )              (  v1  e   d          ) */
 /* >    (              d   e  )              (  v1  v2  e   d      ) */
 /* >    (                  d  )              (  v1  v2  v3  e   d  ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of T, and vi */
 /* >  denotes an element of the vector defining H(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zhetd2_(char *uplo, integer *n, doublecomplex *a, 
 	integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublereal d__1;
    doublecomplex z__1, z__2, z__3, z__4;

    /* Local variables */
    doublecomplex taui;
    extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *);
    integer i__;
    doublecomplex alpha;
    extern logical lsame_(char *, char *);
    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
    extern /* Subroutine */ int zhemv_(char *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *);
    logical upper;
    extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(
 	    char *, integer *, ftnlen), zlarfg_(integer *, doublecomplex *, 
 	    doublecomplex *, integer *, doublecomplex *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tau;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHETD2", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n <= 0) {
 	return 0;
    }

    if (upper) {

 /*        Reduce the upper triangle of A */

 	i__1 = *n + *n * a_dim1;
 	i__2 = *n + *n * a_dim1;
 	d__1 = a[i__2].r;
 	a[i__1].r = d__1, a[i__1].i = 0.;
 	for (i__ = *n - 1; i__ >= 1; --i__) {

 /*           Generate elementary reflector H(i) = I - tau * v * v**H */
 /*           to annihilate A(1:i-1,i+1) */

 	    i__1 = i__ + (i__ + 1) * a_dim1;
 	    alpha.r = a[i__1].r, alpha.i = a[i__1].i;
 	    zlarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui);
 	    i__1 = i__;
 	    e[i__1] = alpha.r;

 	    if (taui.r != 0. || taui.i != 0.) {

 /*              Apply H(i) from both sides to A(1:i,1:i) */

 		i__1 = i__ + (i__ + 1) * a_dim1;
 		a[i__1].r = 1., a[i__1].i = 0.;

 /*              Compute  x := tau * A * v  storing x in TAU(1:i) */

 		zhemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * 
 			a_dim1 + 1], &c__1, &c_b2, &tau[1], &c__1);

 /*              Compute  w := x - 1/2 * tau * (x**H * v) * v */

 		z__3.r = -.5, z__3.i = 0.;
 		z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * 
 			taui.i + z__3.i * taui.r;
 		zdotc_(&z__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1]
 			, &c__1);
 		z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * 
 			z__4.i + z__2.i * z__4.r;
 		alpha.r = z__1.r, alpha.i = z__1.i;
 		zaxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
 			1], &c__1);

 /*              Apply the transformation as a rank-2 update: */
 /*                 A := A - v * w**H - w * v**H */

 		z__1.r = -1., z__1.i = 0.;
 		zher2_(uplo, &i__, &z__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, &
 			tau[1], &c__1, &a[a_offset], lda);

 	    } else {
 		i__1 = i__ + i__ * a_dim1;
 		i__2 = i__ + i__ * a_dim1;
 		d__1 = a[i__2].r;
 		a[i__1].r = d__1, a[i__1].i = 0.;
 	    }
 	    i__1 = i__ + (i__ + 1) * a_dim1;
 	    i__2 = i__;
 	    a[i__1].r = e[i__2], a[i__1].i = 0.;
 	    i__1 = i__ + 1;
 	    i__2 = i__ + 1 + (i__ + 1) * a_dim1;
 	    d__[i__1] = a[i__2].r;
 	    i__1 = i__;
 	    tau[i__1].r = taui.r, tau[i__1].i = taui.i;
 /* L10: */
 	}
 	i__1 = a_dim1 + 1;
 	d__[1] = a[i__1].r;
    } else {

 /*        Reduce the lower triangle of A */

 	i__1 = a_dim1 + 1;
 	i__2 = a_dim1 + 1;
 	d__1 = a[i__2].r;
 	a[i__1].r = d__1, a[i__1].i = 0.;
 	i__1 = *n - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {

 /*           Generate elementary reflector H(i) = I - tau * v * v**H */
 /*           to annihilate A(i+2:n,i) */

 	    i__2 = i__ + 1 + i__ * a_dim1;
 	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
 	    i__2 = *n - i__;
 /* Computing MIN */
 	    i__3 = i__ + 2;
 	    zlarfg_(&i__2, &alpha, &a[f2cmin(i__3,*n) + i__ * a_dim1], &c__1, &
 		    taui);
 	    i__2 = i__;
 	    e[i__2] = alpha.r;

 	    if (taui.r != 0. || taui.i != 0.) {

 /*              Apply H(i) from both sides to A(i+1:n,i+1:n) */

 		i__2 = i__ + 1 + i__ * a_dim1;
 		a[i__2].r = 1., a[i__2].i = 0.;

 /*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */

 		i__2 = *n - i__;
 		zhemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], 
 			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b2, &tau[
 			i__], &c__1);

 /*              Compute  w := x - 1/2 * tau * (x**H * v) * v */

 		z__3.r = -.5, z__3.i = 0.;
 		z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * 
 			taui.i + z__3.i * taui.r;
 		i__2 = *n - i__;
 		zdotc_(&z__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * 
 			a_dim1], &c__1);
 		z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * 
 			z__4.i + z__2.i * z__4.r;
 		alpha.r = z__1.r, alpha.i = z__1.i;
 		i__2 = *n - i__;
 		zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 			i__], &c__1);

 /*              Apply the transformation as a rank-2 update: */
 /*                 A := A - v * w**H - w * v**H */

 		i__2 = *n - i__;
 		z__1.r = -1., z__1.i = 0.;
 		zher2_(uplo, &i__2, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1, 
 			&tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], 
 			lda);

 	    } else {
 		i__2 = i__ + 1 + (i__ + 1) * a_dim1;
 		i__3 = i__ + 1 + (i__ + 1) * a_dim1;
 		d__1 = a[i__3].r;
 		a[i__2].r = d__1, a[i__2].i = 0.;
 	    }
 	    i__2 = i__ + 1 + i__ * a_dim1;
 	    i__3 = i__;
 	    a[i__2].r = e[i__3], a[i__2].i = 0.;
 	    i__2 = i__;
 	    i__3 = i__ + i__ * a_dim1;
 	    d__[i__2] = a[i__3].r;
 	    i__2 = i__;
 	    tau[i__2].r = taui.r, tau[i__2].i = taui.i;
 /* L20: */
 	}
 	i__1 = *n;
 	i__2 = *n + *n * a_dim1;
 	d__[i__1] = a[i__2].r;
    }

    return 0;

 /*     End of ZHETD2 */

 } /* zhetd2_ */

--- a/lapack-netlib/SRC/zhetf2.c
+++ b/lapack-netlib/SRC/zhetf2.c
--- a/lapack-netlib/SRC/zhetf2_rk.c
+++ b/lapack-netlib/SRC/zhetf2_rk.c
--- a/lapack-netlib/SRC/zhetf2_rook.c
+++ b/lapack-netlib/SRC/zhetf2_rook.c
--- a/lapack-netlib/SRC/zhetrd.c
+++ b/lapack-netlib/SRC/zhetrd.c
@@ -0,0 +1,815 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 static doublereal c_b23 = 1.;

 /* > \brief \b ZHETRD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHETRD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LWORK, N */
 /*       DOUBLE PRECISION   D( * ), E( * ) */
 /*       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHETRD reduces a complex Hermitian matrix A to real symmetric */
 /* > tridiagonal form T by a unitary similarity transformation: */
 /* > Q**H * A * Q = T. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* >          On exit, if UPLO = 'U', the diagonal and first superdiagonal */
 /* >          of A are overwritten by the corresponding elements of the */
 /* >          tridiagonal matrix T, and the elements above the first */
 /* >          superdiagonal, with the array TAU, represent the unitary */
 /* >          matrix Q as a product of elementary reflectors; if UPLO */
 /* >          = 'L', the diagonal and first subdiagonal of A are over- */
 /* >          written by the corresponding elements of the tridiagonal */
 /* >          matrix T, and the elements below the first subdiagonal, with */
 /* >          the array TAU, represent the unitary matrix Q as a product */
 /* >          of elementary reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is DOUBLE PRECISION array, dimension (N) */
 /* >          The diagonal elements of the tridiagonal matrix T: */
 /* >          D(i) = A(i,i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is DOUBLE PRECISION array, dimension (N-1) */
 /* >          The off-diagonal elements of the tridiagonal matrix T: */
 /* >          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX*16 array, dimension (N-1) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= 1. */
 /* >          For optimum performance LWORK >= N*NB, where NB is the */
 /* >          optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup complex16HEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  If UPLO = 'U', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(n-1) . . . H(2) H(1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */
 /* >  A(1:i-1,i+1), and tau in TAU(i). */
 /* > */
 /* >  If UPLO = 'L', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(n-1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */
 /* >  and tau in TAU(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples */
 /* >  with n = 5: */
 /* > */
 /* >  if UPLO = 'U':                       if UPLO = 'L': */
 /* > */
 /* >    (  d   e   v2  v3  v4 )              (  d                  ) */
 /* >    (      d   e   v3  v4 )              (  e   d              ) */
 /* >    (          d   e   v4 )              (  v1  e   d          ) */
 /* >    (              d   e  )              (  v1  v2  e   d      ) */
 /* >    (                  d  )              (  v1  v2  v3  e   d  ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of T, and vi */
 /* >  denotes an element of the vector defining H(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zhetrd_(char *uplo, integer *n, doublecomplex *a, 
 	integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, 
 	doublecomplex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    doublecomplex z__1;

    /* Local variables */
    integer i__, j;
    extern logical lsame_(char *, char *);
    integer nbmin, iinfo;
    logical upper;
    extern /* Subroutine */ int zhetd2_(char *, integer *, doublecomplex *, 
 	    integer *, doublereal *, doublereal *, doublecomplex *, integer *), zher2k_(char *, char *, integer *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, doublereal *, doublecomplex *, integer *);
    integer nb, kk, nx;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zlatrd_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublecomplex *, 
 	    doublecomplex *, integer *);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1;
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -9;
    }

    if (*info == 0) {

 /*        Determine the block size. */

 	nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 	lwkopt = *n * nb;
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHETRD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	work[1].r = 1., work[1].i = 0.;
 	return 0;
    }

    nx = *n;
    iws = 1;
    if (nb > 1 && nb < *n) {

 /*        Determine when to cross over from blocked to unblocked code */
 /*        (last block is always handled by unblocked code). */

 /* Computing MAX */
 	i__1 = nb, i__2 = ilaenv_(&c__3, "ZHETRD", uplo, n, &c_n1, &c_n1, &
 		c_n1, (ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < *n) {

 /*           Determine if workspace is large enough for blocked code. */

 	    ldwork = *n;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {

 /*              Not enough workspace to use optimal NB:  determine the */
 /*              minimum value of NB, and reduce NB or force use of */
 /*              unblocked code by setting NX = N. */

 /* Computing MAX */
 		i__1 = *lwork / ldwork;
 		nb = f2cmax(i__1,1);
 		nbmin = ilaenv_(&c__2, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1,
 			 (ftnlen)6, (ftnlen)1);
 		if (nb < nbmin) {
 		    nx = *n;
 		}
 	    }
 	} else {
 	    nx = *n;
 	}
    } else {
 	nb = 1;
    }

    if (upper) {

 /*        Reduce the upper triangle of A. */
 /*        Columns 1:kk are handled by the unblocked method. */

 	kk = *n - (*n - nx + nb - 1) / nb * nb;
 	i__1 = kk + 1;
 	i__2 = -nb;
 	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
 		i__2) {

 /*           Reduce columns i:i+nb-1 to tridiagonal form and form the */
 /*           matrix W which is needed to update the unreduced part of */
 /*           the matrix */

 	    i__3 = i__ + nb - 1;
 	    zlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
 		    work[1], &ldwork);

 /*           Update the unreduced submatrix A(1:i-1,1:i-1), using an */
 /*           update of the form:  A := A - V*W**H - W*V**H */

 	    i__3 = i__ - 1;
 	    z__1.r = -1., z__1.i = 0.;
 	    zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ * a_dim1 
 		    + 1], lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda);

 /*           Copy superdiagonal elements back into A, and diagonal */
 /*           elements into D */

 	    i__3 = i__ + nb - 1;
 	    for (j = i__; j <= i__3; ++j) {
 		i__4 = j - 1 + j * a_dim1;
 		i__5 = j - 1;
 		a[i__4].r = e[i__5], a[i__4].i = 0.;
 		i__4 = j;
 		i__5 = j + j * a_dim1;
 		d__[i__4] = a[i__5].r;
 /* L10: */
 	    }
 /* L20: */
 	}

 /*        Use unblocked code to reduce the last or only block */

 	zhetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
    } else {

 /*        Reduce the lower triangle of A */

 	i__2 = *n - nx;
 	i__1 = nb;
 	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {

 /*           Reduce columns i:i+nb-1 to tridiagonal form and form the */
 /*           matrix W which is needed to update the unreduced part of */
 /*           the matrix */

 	    i__3 = *n - i__ + 1;
 	    zlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
 		    tau[i__], &work[1], &ldwork);

 /*           Update the unreduced submatrix A(i+nb:n,i+nb:n), using */
 /*           an update of the form:  A := A - V*W**H - W*V**H */

 	    i__3 = *n - i__ - nb + 1;
 	    z__1.r = -1., z__1.i = 0.;
 	    zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ + nb + 
 		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b23, &a[
 		    i__ + nb + (i__ + nb) * a_dim1], lda);

 /*           Copy subdiagonal elements back into A, and diagonal */
 /*           elements into D */

 	    i__3 = i__ + nb - 1;
 	    for (j = i__; j <= i__3; ++j) {
 		i__4 = j + 1 + j * a_dim1;
 		i__5 = j;
 		a[i__4].r = e[i__5], a[i__4].i = 0.;
 		i__4 = j;
 		i__5 = j + j * a_dim1;
 		d__[i__4] = a[i__5].r;
 /* L30: */
 	    }
 /* L40: */
 	}

 /*        Use unblocked code to reduce the last or only block */

 	i__1 = *n - i__ + 1;
 	zhetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], 
 		&tau[i__], &iinfo);
    }

    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    return 0;

 /*     End of ZHETRD */

 } /* zhetrd_ */

--- a/lapack-netlib/SRC/zhetrd_2stage.c
+++ b/lapack-netlib/SRC/zhetrd_2stage.c
@@ -0,0 +1,746 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static integer c__3 = 3;
 static integer c__4 = 4;

 /* > \brief \b ZHETRD_2STAGE */

 /*  @precisions fortran z -> s d c */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHETRD_2STAGE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_
 2stage.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_
 2stage.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_
 2stage.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, */
 /*                                 HOUS2, LHOUS2, WORK, LWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          VECT, UPLO */
 /*       INTEGER            N, LDA, LWORK, LHOUS2, INFO */
 /*       DOUBLE PRECISION   D( * ), E( * ) */
 /*       COMPLEX*16         A( LDA, * ), TAU( * ), */
 /*                          HOUS2( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHETRD_2STAGE reduces a complex Hermitian matrix A to real symmetric */
 /* > tridiagonal form T by a unitary similarity transformation: */
 /* > Q1**H Q2**H* A * Q2 * Q1 = T. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] VECT */
 /* > \verbatim */
 /* >          VECT is CHARACTER*1 */
 /* >          = 'N':  No need for the Housholder representation, */
 /* >                  in particular for the second stage (Band to */
 /* >                  tridiagonal) and thus LHOUS2 is of size f2cmax(1, 4*N); */
 /* >          = 'V':  the Householder representation is needed to */
 /* >                  either generate Q1 Q2 or to apply Q1 Q2, */
 /* >                  then LHOUS2 is to be queried and computed. */
 /* >                  (NOT AVAILABLE IN THIS RELEASE). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* >          On exit, if UPLO = 'U', the band superdiagonal */
 /* >          of A are overwritten by the corresponding elements of the */
 /* >          internal band-diagonal matrix AB, and the elements above */
 /* >          the KD superdiagonal, with the array TAU, represent the unitary */
 /* >          matrix Q1 as a product of elementary reflectors; if UPLO */
 /* >          = 'L', the diagonal and band subdiagonal of A are over- */
 /* >          written by the corresponding elements of the internal band-diagonal */
 /* >          matrix AB, and the elements below the KD subdiagonal, with */
 /* >          the array TAU, represent the unitary matrix Q1 as a product */
 /* >          of elementary reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is DOUBLE PRECISION array, dimension (N) */
 /* >          The diagonal elements of the tridiagonal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is DOUBLE PRECISION array, dimension (N-1) */
 /* >          The off-diagonal elements of the tridiagonal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX*16 array, dimension (N-KD) */
 /* >          The scalar factors of the elementary reflectors of */
 /* >          the first stage (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] HOUS2 */
 /* > \verbatim */
 /* >          HOUS2 is COMPLEX*16 array, dimension (LHOUS2) */
 /* >          Stores the Householder representation of the stage2 */
 /* >          band to tridiagonal. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LHOUS2 */
 /* > \verbatim */
 /* >          LHOUS2 is INTEGER */
 /* >          The dimension of the array HOUS2. */
 /* >          If LWORK = -1, or LHOUS2 = -1, */
 /* >          then a query is assumed; the routine */
 /* >          only calculates the optimal size of the HOUS2 array, returns */
 /* >          this value as the first entry of the HOUS2 array, and no error */
 /* >          message related to LHOUS2 is issued by XERBLA. */
 /* >          If VECT='N', LHOUS2 = f2cmax(1, 4*n); */
 /* >          if VECT='V', option not yet available. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (LWORK) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK = MAX(1, dimension) */
 /* >          If LWORK = -1, or LHOUS2=-1, */
 /* >          then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* >          LWORK = MAX(1, dimension) where */
 /* >          dimension   = f2cmax(stage1,stage2) + (KD+1)*N */
 /* >                      = N*KD + N*f2cmax(KD+1,FACTOPTNB) */
 /* >                        + f2cmax(2*KD*KD, KD*NTHREADS) */
 /* >                        + (KD+1)*N */
 /* >          where KD is the blocking size of the reduction, */
 /* >          FACTOPTNB is the blocking used by the QR or LQ */
 /* >          algorithm, usually FACTOPTNB=128 is a good choice */
 /* >          NTHREADS is the number of threads used when */
 /* >          openMP compilation is enabled, otherwise =1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16HEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  Implemented by Azzam Haidar. */
 /* > */
 /* >  All details are available on technical report, SC11, SC13 papers. */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zhetrd_2stage_(char *vect, char *uplo, integer *n, 
 	doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, 
 	doublecomplex *tau, doublecomplex *hous2, integer *lhous2, 
 	doublecomplex *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1;

    /* Local variables */
    integer ldab;
    extern /* Subroutine */ int zhetrd_he2hb_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *, 
 	    doublecomplex *, doublecomplex *, integer *, integer *);
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    extern /* Subroutine */ int zhetrd_hb2st_(char *, char *, char *, 
 	    integer *, integer *, doublecomplex *, integer *, doublereal *, 
 	    doublereal *, doublecomplex *, integer *, doublecomplex *, 
 	    integer *, integer *);
    integer lwrk, wpos;
    extern logical lsame_(char *, char *);
    integer abpos, lhmin, lwmin;
    logical wantq, upper;
    integer ib, kd;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical lquery;



 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */

 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tau;
    --hous2;
    --work;

    /* Function Body */
    *info = 0;
    wantq = lsame_(vect, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *lhous2 == -1;

 /*     Determine the block size, the workspace size and the hous size. */

    kd = ilaenv2stage_(&c__1, "ZHETRD_2STAGE", vect, n, &c_n1, &c_n1, &c_n1);
    ib = ilaenv2stage_(&c__2, "ZHETRD_2STAGE", vect, n, &kd, &c_n1, &c_n1);
    lhmin = ilaenv2stage_(&c__3, "ZHETRD_2STAGE", vect, n, &kd, &ib, &c_n1);
    lwmin = ilaenv2stage_(&c__4, "ZHETRD_2STAGE", vect, n, &kd, &ib, &c_n1);
 /*      WRITE(*,*),'ZHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, */
 /*     $            LHMIN, LWMIN */

    if (! lsame_(vect, "N")) {
 	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*lhous2 < lhmin && ! lquery) {
 	*info = -10;
    } else if (*lwork < lwmin && ! lquery) {
 	*info = -12;
    }

    if (*info == 0) {
 	hous2[1].r = (doublereal) lhmin, hous2[1].i = 0.;
 	work[1].r = (doublereal) lwmin, work[1].i = 0.;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHETRD_2STAGE", &i__1, (ftnlen)13);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	work[1].r = 1., work[1].i = 0.;
 	return 0;
    }

 /*     Determine pointer position */

    ldab = kd + 1;
    lwrk = *lwork - ldab * *n;
    abpos = 1;
    wpos = abpos + ldab * *n;
    zhetrd_he2hb_(uplo, n, &kd, &a[a_offset], lda, &work[abpos], &ldab, &tau[
 	    1], &work[wpos], &lwrk, info);
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHETRD_HE2HB", &i__1, (ftnlen)12);
 	return 0;
    }
    zhetrd_hb2st_("Y", vect, uplo, n, &kd, &work[abpos], &ldab, &d__[1], &e[
 	    1], &hous2[1], lhous2, &work[wpos], &lwrk, info);
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHETRD_HB2ST", &i__1, (ftnlen)12);
 	return 0;
    }


    hous2[1].r = (doublereal) lhmin, hous2[1].i = 0.;
    work[1].r = (doublereal) lwmin, work[1].i = 0.;
    return 0;

 /*     End of ZHETRD_2STAGE */

 } /* zhetrd_2stage__ */

--- a/lapack-netlib/SRC/zhetrd_hb2st.c
+++ b/lapack-netlib/SRC/zhetrd_hb2st.c
--- a/lapack-netlib/SRC/zhetrd_he2hb.c
+++ b/lapack-netlib/SRC/zhetrd_he2hb.c
@@ -0,0 +1,966 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static doublecomplex c_b1 = {0.,0.};
 static doublecomplex c_b2 = {1.,0.};
 static integer c__4 = 4;
 static integer c_n1 = -1;
 static integer c__1 = 1;
 static doublereal c_b33 = 1.;

 /* > \brief \b ZHETRD_HE2HB */

 /*  @precisions fortran z -> s d c */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download ZHETRD_HE2HB + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, */
 /*                              WORK, LWORK, INFO ) */

 /*       IMPLICIT NONE */

 /*       CHARACTER          UPLO */
 /*       INTEGER            INFO, LDA, LDAB, LWORK, N, KD */
 /*       COMPLEX*16         A( LDA, * ), AB( LDAB, * ), */
 /*                          TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian */
 /* > band-diagonal form AB by a unitary similarity transformation: */
 /* > Q**H * A * Q = AB. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  Upper triangle of A is stored; */
 /* >          = 'L':  Lower triangle of A is stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KD */
 /* > \verbatim */
 /* >          KD is INTEGER */
 /* >          The number of superdiagonals of the reduced matrix if UPLO = 'U', */
 /* >          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */
 /* >          The reduced matrix is stored in the array AB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is COMPLEX*16 array, dimension (LDA,N) */
 /* >          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
 /* >          N-by-N upper triangular part of A contains the upper */
 /* >          triangular part of the matrix A, and the strictly lower */
 /* >          triangular part of A is not referenced.  If UPLO = 'L', the */
 /* >          leading N-by-N lower triangular part of A contains the lower */
 /* >          triangular part of the matrix A, and the strictly upper */
 /* >          triangular part of A is not referenced. */
 /* >          On exit, if UPLO = 'U', the diagonal and first superdiagonal */
 /* >          of A are overwritten by the corresponding elements of the */
 /* >          tridiagonal matrix T, and the elements above the first */
 /* >          superdiagonal, with the array TAU, represent the unitary */
 /* >          matrix Q as a product of elementary reflectors; if UPLO */
 /* >          = 'L', the diagonal and first subdiagonal of A are over- */
 /* >          written by the corresponding elements of the tridiagonal */
 /* >          matrix T, and the elements below the first subdiagonal, with */
 /* >          the array TAU, represent the unitary matrix Q as a product */
 /* >          of elementary reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AB */
 /* > \verbatim */
 /* >          AB is COMPLEX*16 array, dimension (LDAB,N) */
 /* >          On exit, the upper or lower triangle of the Hermitian band */
 /* >          matrix A, stored in the first KD+1 rows of the array.  The */
 /* >          j-th column of A is stored in the j-th column of the array AB */
 /* >          as follows: */
 /* >          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for f2cmax(1,j-kd)<=i<=j; */
 /* >          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=f2cmin(n,j+kd). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KD+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is COMPLEX*16 array, dimension (N-KD) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is COMPLEX*16 array, dimension (LWORK) */
 /* >          On exit, if INFO = 0, or if LWORK=-1, */
 /* >          WORK(1) returns the size of LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK which should be calculated */
 /* >          by a workspace query. LWORK = MAX(1, LWORK_QUERY) */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* >          LWORK_QUERY = N*KD + N*f2cmax(KD,FACTOPTNB) + 2*KD*KD */
 /* >          where FACTOPTNB is the blocking used by the QR or LQ */
 /* >          algorithm, usually FACTOPTNB=128 is a good choice otherwise */
 /* >          putting LWORK=-1 will provide the size of WORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup complex16HEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  Implemented by Azzam Haidar. */
 /* > */
 /* >  All details are available on technical report, SC11, SC13 papers. */
 /* > */
 /* >  Azzam Haidar, Hatem Ltaief, and Jack Dongarra. */
 /* >  Parallel reduction to condensed forms for symmetric eigenvalue problems */
 /* >  using aggregated fine-grained and memory-aware kernels. In Proceedings */
 /* >  of 2011 International Conference for High Performance Computing, */
 /* >  Networking, Storage and Analysis (SC '11), New York, NY, USA, */
 /* >  Article 8 , 11 pages. */
 /* >  http://doi.acm.org/10.1145/2063384.2063394 */
 /* > */
 /* >  A. Haidar, J. Kurzak, P. Luszczek, 2013. */
 /* >  An improved parallel singular value algorithm and its implementation */
 /* >  for multicore hardware, In Proceedings of 2013 International Conference */
 /* >  for High Performance Computing, Networking, Storage and Analysis (SC '13). */
 /* >  Denver, Colorado, USA, 2013. */
 /* >  Article 90, 12 pages. */
 /* >  http://doi.acm.org/10.1145/2503210.2503292 */
 /* > */
 /* >  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. */
 /* >  A novel hybrid CPU-GPU generalized eigensolver for electronic structure */
 /* >  calculations based on fine-grained memory aware tasks. */
 /* >  International Journal of High Performance Computing Applications. */
 /* >  Volume 28 Issue 2, Pages 196-209, May 2014. */
 /* >  http://hpc.sagepub.com/content/28/2/196 */
 /* > */
 /* > \endverbatim */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  If UPLO = 'U', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd. */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in */
 /* >  A(i,i+kd+1:n), and tau in TAU(i). */
 /* > */
 /* >  If UPLO = 'L', the matrix Q is represented as a product of elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = n-kd. */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**H */
 /* > */
 /* >  where tau is a complex scalar, and v is a complex vector with */
 /* >  v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in */
 /* >  A(i+kd+2:n,i), and tau in TAU(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples */
 /* >  with n = 5: */
 /* > */
 /* >  if UPLO = 'U':                       if UPLO = 'L': */
 /* > */
 /* >    (  ab  ab/v1  v1      v1     v1    )              (  ab                            ) */
 /* >    (      ab     ab/v2   v2     v2    )              (  ab/v1  ab                     ) */
 /* >    (             ab      ab/v3  v3    )              (  v1     ab/v2  ab              ) */
 /* >    (                     ab     ab/v4 )              (  v1     v2     ab/v3  ab       ) */
 /* >    (                            ab    )              (  v1     v2     v3     ab/v4 ab ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of T, and vi */
 /* >  denotes an element of the vector defining H(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int zhetrd_he2hb_(char *uplo, integer *n, integer *kd, 
 	doublecomplex *a, integer *lda, doublecomplex *ab, integer *ldab, 
 	doublecomplex *tau, doublecomplex *work, integer *lwork, integer *
 	info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, 
 	    i__5;
    doublecomplex z__1;

    /* Local variables */
    extern integer ilaenv2stage_(integer *, char *, char *, integer *, 
 	    integer *, integer *, integer *);
    integer tpos, wpos, s1pos, s2pos, i__, j;
    extern logical lsame_(char *, char *);
    integer iinfo;
    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *), zhemm_(char *, char *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
 	    integer *);
    integer lwmin;
    logical upper;
    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *), zher2k_(char *, char *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublereal *, doublecomplex *, 
 	    integer *);
    integer lk, pk, pn, lt, lw;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), zgelqf_(
 	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
 	     doublecomplex *, integer *, integer *), zgeqrf_(integer *, 
 	    integer *, doublecomplex *, integer *, doublecomplex *, 
 	    doublecomplex *, integer *, integer *), zlarft_(char *, char *, 
 	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
 	     doublecomplex *, integer *), zlaset_(char *, 
 	    integer *, integer *, doublecomplex *, doublecomplex *, 
 	    doublecomplex *, integer *);
    integer ls1;
    logical lquery;
    integer ls2, ldt, ldw, lds1, lds2;



 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Determine the minimal workspace size required */
 /*     and test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1;
    lwmin = ilaenv2stage_(&c__4, "ZHETRD_HE2HB", "", n, kd, &c_n1, &c_n1);
    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kd < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = *kd + 1;
 	if (*ldab < f2cmax(i__1,i__2)) {
 	    *info = -7;
 	} else if (*lwork < lwmin && ! lquery) {
 	    *info = -10;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZHETRD_HE2HB", &i__1, (ftnlen)12);
 	return 0;
    } else if (lquery) {
 	work[1].r = (doublereal) lwmin, work[1].i = 0.;
 	return 0;
    }

 /*     Quick return if possible */
 /*     Copy the upper/lower portion of A into AB */

    if (*n <= *kd + 1) {
 	if (upper) {
 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 		i__2 = *kd + 1;
 		lk = f2cmin(i__2,i__);
 		zcopy_(&lk, &a[i__ - lk + 1 + i__ * a_dim1], &c__1, &ab[*kd + 
 			1 - lk + 1 + i__ * ab_dim1], &c__1);
 /* L100: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 		i__2 = *kd + 1, i__3 = *n - i__ + 1;
 		lk = f2cmin(i__2,i__3);
 		zcopy_(&lk, &a[i__ + i__ * a_dim1], &c__1, &ab[i__ * ab_dim1 
 			+ 1], &c__1);
 /* L110: */
 	    }
 	}
 	work[1].r = 1., work[1].i = 0.;
 	return 0;
    }

 /*     Determine the pointer position for the workspace */

    ldt = *kd;
    lds1 = *kd;
    lt = ldt * *kd;
    lw = *n * *kd;
    ls1 = lds1 * *kd;
    ls2 = lwmin - lt - lw - ls1;
 /*      LS2 = N*MAX(KD,FACTOPTNB) */
    tpos = 1;
    wpos = tpos + lt;
    s1pos = wpos + lw;
    s2pos = s1pos + ls1;
    if (upper) {
 	ldw = *kd;
 	lds2 = *kd;
    } else {
 	ldw = *n;
 	lds2 = *n;
    }


 /*     Set the workspace of the triangular matrix T to zero once such a */
 /*     way every time T is generated the upper/lower portion will be always zero */

    zlaset_("A", &ldt, kd, &c_b1, &c_b1, &work[tpos], &ldt);

    if (upper) {
 	i__1 = *n - *kd;
 	i__2 = *kd;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 	    pn = *n - i__ - *kd + 1;
 /* Computing MIN */
 	    i__3 = *n - i__ - *kd + 1;
 	    pk = f2cmin(i__3,*kd);

 /*            Compute the LQ factorization of the current block */

 	    zgelqf_(kd, &pn, &a[i__ + (i__ + *kd) * a_dim1], lda, &tau[i__], &
 		    work[s2pos], &ls2, &iinfo);

 /*            Copy the upper portion of A into AB */

 	    i__3 = i__ + pk - 1;
 	    for (j = i__; j <= i__3; ++j) {
 /* Computing MIN */
 		i__4 = *kd, i__5 = *n - j;
 		lk = f2cmin(i__4,i__5) + 1;
 		i__4 = *ldab - 1;
 		zcopy_(&lk, &a[j + j * a_dim1], lda, &ab[*kd + 1 + j * 
 			ab_dim1], &i__4);
 /* L20: */
 	    }

 	    zlaset_("Lower", &pk, &pk, &c_b1, &c_b2, &a[i__ + (i__ + *kd) * 
 		    a_dim1], lda);

 /*            Form the matrix T */

 	    zlarft_("Forward", "Rowwise", &pn, &pk, &a[i__ + (i__ + *kd) * 
 		    a_dim1], lda, &tau[i__], &work[tpos], &ldt);

 /*            Compute W: */

 	    zgemm_("Conjugate", "No transpose", &pk, &pn, &pk, &c_b2, &work[
 		    tpos], &ldt, &a[i__ + (i__ + *kd) * a_dim1], lda, &c_b1, &
 		    work[s2pos], &lds2);

 	    zhemm_("Right", uplo, &pk, &pn, &c_b2, &a[i__ + *kd + (i__ + *kd) 
 		    * a_dim1], lda, &work[s2pos], &lds2, &c_b1, &work[wpos], &
 		    ldw);

 	    zgemm_("No transpose", "Conjugate", &pk, &pk, &pn, &c_b2, &work[
 		    wpos], &ldw, &work[s2pos], &lds2, &c_b1, &work[s1pos], &
 		    lds1);

 	    z__1.r = -.5, z__1.i = 0.;
 	    zgemm_("No transpose", "No transpose", &pk, &pn, &pk, &z__1, &
 		    work[s1pos], &lds1, &a[i__ + (i__ + *kd) * a_dim1], lda, &
 		    c_b2, &work[wpos], &ldw);


 /*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using */
 /*            an update of the form:  A := A - V'*W - W'*V */

 	    z__1.r = -1., z__1.i = 0.;
 	    zher2k_(uplo, "Conjugate", &pn, &pk, &z__1, &a[i__ + (i__ + *kd) *
 		     a_dim1], lda, &work[wpos], &ldw, &c_b33, &a[i__ + *kd + (
 		    i__ + *kd) * a_dim1], lda);
 /* L10: */
 	}

 /*        Copy the upper band to AB which is the band storage matrix */

 	i__2 = *n;
 	for (j = *n - *kd + 1; j <= i__2; ++j) {
 /* Computing MIN */
 	    i__1 = *kd, i__3 = *n - j;
 	    lk = f2cmin(i__1,i__3) + 1;
 	    i__1 = *ldab - 1;
 	    zcopy_(&lk, &a[j + j * a_dim1], lda, &ab[*kd + 1 + j * ab_dim1], &
 		    i__1);
 /* L30: */
 	}

    } else {

 /*         Reduce the lower triangle of A to lower band matrix */

 	i__2 = *n - *kd;
 	i__1 = *kd;
 	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
 	    pn = *n - i__ - *kd + 1;
 /* Computing MIN */
 	    i__3 = *n - i__ - *kd + 1;
 	    pk = f2cmin(i__3,*kd);

 /*            Compute the QR factorization of the current block */

 	    zgeqrf_(&pn, kd, &a[i__ + *kd + i__ * a_dim1], lda, &tau[i__], &
 		    work[s2pos], &ls2, &iinfo);

 /*            Copy the upper portion of A into AB */

 	    i__3 = i__ + pk - 1;
 	    for (j = i__; j <= i__3; ++j) {
 /* Computing MIN */
 		i__4 = *kd, i__5 = *n - j;
 		lk = f2cmin(i__4,i__5) + 1;
 		zcopy_(&lk, &a[j + j * a_dim1], &c__1, &ab[j * ab_dim1 + 1], &
 			c__1);
 /* L50: */
 	    }

 	    zlaset_("Upper", &pk, &pk, &c_b1, &c_b2, &a[i__ + *kd + i__ * 
 		    a_dim1], lda);

 /*            Form the matrix T */

 	    zlarft_("Forward", "Columnwise", &pn, &pk, &a[i__ + *kd + i__ * 
 		    a_dim1], lda, &tau[i__], &work[tpos], &ldt);

 /*            Compute W: */

 	    zgemm_("No transpose", "No transpose", &pn, &pk, &pk, &c_b2, &a[
 		    i__ + *kd + i__ * a_dim1], lda, &work[tpos], &ldt, &c_b1, 
 		    &work[s2pos], &lds2);

 	    zhemm_("Left", uplo, &pn, &pk, &c_b2, &a[i__ + *kd + (i__ + *kd) *
 		     a_dim1], lda, &work[s2pos], &lds2, &c_b1, &work[wpos], &
 		    ldw);

 	    zgemm_("Conjugate", "No transpose", &pk, &pk, &pn, &c_b2, &work[
 		    s2pos], &lds2, &work[wpos], &ldw, &c_b1, &work[s1pos], &
 		    lds1);

 	    z__1.r = -.5, z__1.i = 0.;
 	    zgemm_("No transpose", "No transpose", &pn, &pk, &pk, &z__1, &a[
 		    i__ + *kd + i__ * a_dim1], lda, &work[s1pos], &lds1, &
 		    c_b2, &work[wpos], &ldw);


 /*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using */
 /*            an update of the form:  A := A - V*W' - W*V' */

 	    z__1.r = -1., z__1.i = 0.;
 	    zher2k_(uplo, "No transpose", &pn, &pk, &z__1, &a[i__ + *kd + i__ 
 		    * a_dim1], lda, &work[wpos], &ldw, &c_b33, &a[i__ + *kd + 
 		    (i__ + *kd) * a_dim1], lda);
 /*            ================================================================== */
 /*            RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED */
 /*             DO 45 J = I, I+PK-1 */
 /*                LK = MIN( KD, N-J ) + 1 */
 /*                CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) */
 /*   45        CONTINUE */
 /*            ================================================================== */
 /* L40: */
 	}

 /*        Copy the lower band to AB which is the band storage matrix */

 	i__1 = *n;
 	for (j = *n - *kd + 1; j <= i__1; ++j) {
 /* Computing MIN */
 	    i__2 = *kd, i__3 = *n - j;
 	    lk = f2cmin(i__2,i__3) + 1;
 	    zcopy_(&lk, &a[j + j * a_dim1], &c__1, &ab[j * ab_dim1 + 1], &
 		    c__1);
 /* L60: */
 	}
    }

    work[1].r = (doublereal) lwmin, work[1].i = 0.;
    return 0;

 /*     End of ZHETRD_HE2HB */

 } /* zhetrd_he2hb__ */