Add C versions as fallback

4 years ago · 1ed9c0071a
--- a/lapack-netlib/SRC/dbbcsd.c
+++ b/lapack-netlib/SRC/dbbcsd.c
--- a/lapack-netlib/SRC/dbdsdc.c
+++ b/lapack-netlib/SRC/dbdsdc.c
@@ -0,0 +1,969 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__9 = 9;
 static integer c__0 = 0;
 static doublereal c_b15 = 1.;
 static integer c__1 = 1;
 static doublereal c_b29 = 0.;
 /* > \brief \b DBDSDC */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DBDSDC + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsdc.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsdc.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsdc.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, */
 /*                          WORK, IWORK, INFO ) */
 /*       CHARACTER          COMPQ, UPLO */
 /*       INTEGER            INFO, LDU, LDVT, N */
 /*       INTEGER            IQ( * ), IWORK( * ) */
 /*       DOUBLE PRECISION   D( * ), E( * ), Q( * ), U( LDU, * ), */
 /*      $                   VT( LDVT, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DBDSDC computes the singular value decomposition (SVD) of a real */
 /* > N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT, */
 /* > using a divide and conquer method, where S is a diagonal matrix */
 /* > with non-negative diagonal elements (the singular values of B), and */
 /* > U and VT are orthogonal matrices of left and right singular vectors, */
 /* > respectively. DBDSDC can be used to compute all singular values, */
 /* > and optionally, singular vectors or singular vectors in compact form. */
 /* > */
 /* > This code makes very mild assumptions about floating point */
 /* > arithmetic. It will work on machines with a guard digit in */
 /* > add/subtract, or on those binary machines without guard digits */
 /* > which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
 /* > It could conceivably fail on hexadecimal or decimal machines */
 /* > without guard digits, but we know of none.  See DLASD3 for details. */
 /* > */
 /* > The code currently calls DLASDQ if singular values only are desired. */
 /* > However, it can be slightly modified to compute singular values */
 /* > using the divide and conquer method. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] UPLO */
 /* > \verbatim */
 /* >          UPLO is CHARACTER*1 */
 /* >          = 'U':  B is upper bidiagonal. */
 /* >          = 'L':  B is lower bidiagonal. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] COMPQ */
 /* > \verbatim */
 /* >          COMPQ is CHARACTER*1 */
 /* >          Specifies whether singular vectors are to be computed */
 /* >          as follows: */
 /* >          = 'N':  Compute singular values only; */
 /* >          = 'P':  Compute singular values and compute singular */
 /* >                  vectors in compact form; */
 /* >          = 'I':  Compute singular values and singular vectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is DOUBLE PRECISION array, dimension (N) */
 /* >          On entry, the n diagonal elements of the bidiagonal matrix B. */
 /* >          On exit, if INFO=0, the singular values of B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] E */
 /* > \verbatim */
 /* >          E is DOUBLE PRECISION array, dimension (N-1) */
 /* >          On entry, the elements of E contain the offdiagonal */
 /* >          elements of the bidiagonal matrix whose SVD is desired. */
 /* >          On exit, E has been destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] U */
 /* > \verbatim */
 /* >          U is DOUBLE PRECISION array, dimension (LDU,N) */
 /* >          If  COMPQ = 'I', then: */
 /* >             On exit, if INFO = 0, U contains the left singular vectors */
 /* >             of the bidiagonal matrix. */
 /* >          For other values of COMPQ, U is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDU */
 /* > \verbatim */
 /* >          LDU is INTEGER */
 /* >          The leading dimension of the array U.  LDU >= 1. */
 /* >          If singular vectors are desired, then LDU >= f2cmax( 1, N ). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] VT */
 /* > \verbatim */
 /* >          VT is DOUBLE PRECISION array, dimension (LDVT,N) */
 /* >          If  COMPQ = 'I', then: */
 /* >             On exit, if INFO = 0, VT**T contains the right singular */
 /* >             vectors of the bidiagonal matrix. */
 /* >          For other values of COMPQ, VT is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDVT */
 /* > \verbatim */
 /* >          LDVT is INTEGER */
 /* >          The leading dimension of the array VT.  LDVT >= 1. */
 /* >          If singular vectors are desired, then LDVT >= f2cmax( 1, N ). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Q */
 /* > \verbatim */
 /* >          Q is DOUBLE PRECISION array, dimension (LDQ) */
 /* >          If  COMPQ = 'P', then: */
 /* >             On exit, if INFO = 0, Q and IQ contain the left */
 /* >             and right singular vectors in a compact form, */
 /* >             requiring O(N log N) space instead of 2*N**2. */
 /* >             In particular, Q contains all the DOUBLE PRECISION data in */
 /* >             LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) */
 /* >             words of memory, where SMLSIZ is returned by ILAENV and */
 /* >             is equal to the maximum size of the subproblems at the */
 /* >             bottom of the computation tree (usually about 25). */
 /* >          For other values of COMPQ, Q is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IQ */
 /* > \verbatim */
 /* >          IQ is INTEGER array, dimension (LDIQ) */
 /* >          If  COMPQ = 'P', then: */
 /* >             On exit, if INFO = 0, Q and IQ contain the left */
 /* >             and right singular vectors in a compact form, */
 /* >             requiring O(N log N) space instead of 2*N**2. */
 /* >             In particular, IQ contains all INTEGER data in */
 /* >             LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) */
 /* >             words of memory, where SMLSIZ is returned by ILAENV and */
 /* >             is equal to the maximum size of the subproblems at the */
 /* >             bottom of the computation tree (usually about 25). */
 /* >          For other values of COMPQ, IQ is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          If COMPQ = 'N' then LWORK >= (4 * N). */
 /* >          If COMPQ = 'P' then LWORK >= (6 * N). */
 /* >          If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (8*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  The algorithm failed to compute a singular value. */
 /* >                The update process of divide and conquer failed. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date June 2016 */
 /* > \ingroup auxOTHERcomputational */
 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Ming Gu and Huan Ren, Computer Science Division, University of */
 /* >     California at Berkeley, USA */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *
 	d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, 
 	integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer *
 	iwork, integer *info)
 {
    /* System generated locals */
    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
    doublereal d__1;
    /* Local variables */
    integer difl, difr, ierr, perm, mlvl, sqre, i__, j, k;
    doublereal p, r__;
    integer z__;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, 
 	    integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *
 	    , doublereal *, integer *), dswap_(integer *, doublereal *, 
 	    integer *, doublereal *, integer *);
    integer poles, iuplo, nsize, start;
    extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *, 
 	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
 	    integer *, integer *, doublereal *, integer *);
    integer ic, ii, kk;
    doublereal cs;
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, 
 	    integer *, doublereal *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
 	     doublereal *, integer *, integer *, integer *, integer *, 
 	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
 	     integer *);
    integer is, iu;
    doublereal sn;
    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
 	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
 	    integer *, integer *), dlasdq_(char *, integer *, integer 
 	    *, integer *, integer *, integer *, doublereal *, doublereal *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, integer *), dlaset_(char *, 
 	    integer *, integer *, doublereal *, doublereal *, doublereal *, 
 	    integer *), dlartg_(doublereal *, doublereal *, 
 	    doublereal *, doublereal *, doublereal *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    integer givcol;
    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
    integer icompq;
    doublereal orgnrm;
    integer givnum, givptr, nm1, qstart, smlsiz, wstart, smlszp;
    doublereal eps;
    integer ivt;
 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */
 /*  ===================================================================== */
 /*  Changed dimension statement in comment describing E from (N) to */
 /*  (N-1).  Sven, 17 Feb 05. */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    --d__;
    --e;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    --q;
    --iq;
    --work;
    --iwork;
    /* Function Body */
    *info = 0;
    iuplo = 0;
    if (lsame_(uplo, "U")) {
 	iuplo = 1;
    }
    if (lsame_(uplo, "L")) {
 	iuplo = 2;
    }
    if (lsame_(compq, "N")) {
 	icompq = 0;
    } else if (lsame_(compq, "P")) {
 	icompq = 1;
    } else if (lsame_(compq, "I")) {
 	icompq = 2;
    } else {
 	icompq = -1;
    }
    if (iuplo == 0) {
 	*info = -1;
    } else if (icompq < 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ldu < 1 || icompq == 2 && *ldu < *n) {
 	*info = -7;
    } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) {
 	*info = -9;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DBDSDC", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*n == 0) {
 	return 0;
    }
    smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
 	    ftnlen)6, (ftnlen)1);
    if (*n == 1) {
 	if (icompq == 1) {
 	    q[1] = d_sign(&c_b15, &d__[1]);
 	    q[smlsiz * *n + 1] = 1.;
 	} else if (icompq == 2) {
 	    u[u_dim1 + 1] = d_sign(&c_b15, &d__[1]);
 	    vt[vt_dim1 + 1] = 1.;
 	}
 	d__[1] = abs(d__[1]);
 	return 0;
    }
    nm1 = *n - 1;
 /*     If matrix lower bidiagonal, rotate to be upper bidiagonal */
 /*     by applying Givens rotations on the left */
    wstart = 1;
    qstart = 3;
    if (icompq == 1) {
 	dcopy_(n, &d__[1], &c__1, &q[1], &c__1);
 	i__1 = *n - 1;
 	dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
    }
    if (iuplo == 2) {
 	qstart = 5;
 	if (icompq == 2) {
 	    wstart = (*n << 1) - 1;
 	}
 	i__1 = *n - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
 	    d__[i__] = r__;
 	    e[i__] = sn * d__[i__ + 1];
 	    d__[i__ + 1] = cs * d__[i__ + 1];
 	    if (icompq == 1) {
 		q[i__ + (*n << 1)] = cs;
 		q[i__ + *n * 3] = sn;
 	    } else if (icompq == 2) {
 		work[i__] = cs;
 		work[nm1 + i__] = -sn;
 	    }
 /* L10: */
 	}
    }
 /*     If ICOMPQ = 0, use DLASDQ to compute the singular values. */
    if (icompq == 0) {
 /*        Ignore WSTART, instead using WORK( 1 ), since the two vectors */
 /*        for CS and -SN above are added only if ICOMPQ == 2, */
 /*        and adding them exceeds documented WORK size of 4*n. */
 	dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
 		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
 		1], info);
 	goto L40;
    }
 /*     If N is smaller than the minimum divide size SMLSIZ, then solve */
 /*     the problem with another solver. */
    if (*n <= smlsiz) {
 	if (icompq == 2) {
 	    dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
 	    dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
 	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
 		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
 		    wstart], info);
 	} else if (icompq == 1) {
 	    iu = 1;
 	    ivt = iu + *n;
 	    dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n);
 	    dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n);
 	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
 		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
 		    iu + (qstart - 1) * *n], n, &work[wstart], info);
 	}
 	goto L40;
    }
    if (icompq == 2) {
 	dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
 	dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
    }
 /*     Scale. */
    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
    if (orgnrm == 0.) {
 	return 0;
    }
    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr);
    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, &
 	    ierr);
    eps = dlamch_("Epsilon") * .9;
    mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / 
 	    log(2.)) + 1;
    smlszp = smlsiz + 1;
    if (icompq == 1) {
 	iu = 1;
 	ivt = smlsiz + 1;
 	difl = ivt + smlszp;
 	difr = difl + mlvl;
 	z__ = difr + (mlvl << 1);
 	ic = z__ + mlvl;
 	is = ic + 1;
 	poles = is + 1;
 	givnum = poles + (mlvl << 1);
 	k = 1;
 	givptr = 2;
 	perm = 3;
 	givcol = perm + mlvl;
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	if ((d__1 = d__[i__], abs(d__1)) < eps) {
 	    d__[i__] = d_sign(&eps, &d__[i__]);
 	}
 /* L20: */
    }
    start = 1;
    sqre = 0;
    i__1 = nm1;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
 /*           Subproblem found. First determine its size and then */
 /*           apply divide and conquer on it. */
 	    if (i__ < nm1) {
 /*              A subproblem with E(I) small for I < NM1. */
 		nsize = i__ - start + 1;
 	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
 /*              A subproblem with E(NM1) not too small but I = NM1. */
 		nsize = *n - start + 1;
 	    } else {
 /*              A subproblem with E(NM1) small. This implies an */
 /*              1-by-1 subproblem at D(N). Solve this 1-by-1 problem */
 /*              first. */
 		nsize = i__ - start + 1;
 		if (icompq == 2) {
 		    u[*n + *n * u_dim1] = d_sign(&c_b15, &d__[*n]);
 		    vt[*n + *n * vt_dim1] = 1.;
 		} else if (icompq == 1) {
 		    q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]);
 		    q[*n + (smlsiz + qstart - 1) * *n] = 1.;
 		}
 		d__[*n] = (d__1 = d__[*n], abs(d__1));
 	    }
 	    if (icompq == 2) {
 		dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + 
 			start * u_dim1], ldu, &vt[start + start * vt_dim1], 
 			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
 	    } else {
 		dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
 			start], &q[start + (iu + qstart - 2) * *n], n, &q[
 			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
 			 &q[start + (difl + qstart - 2) * *n], &q[start + (
 			difr + qstart - 2) * *n], &q[start + (z__ + qstart - 
 			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
 			start + givptr * *n], &iq[start + givcol * *n], n, &
 			iq[start + perm * *n], &q[start + (givnum + qstart - 
 			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
 			start + (is + qstart - 2) * *n], &work[wstart], &
 			iwork[1], info);
 	    }
 	    if (*info != 0) {
 		return 0;
 	    }
 	    start = i__ + 1;
 	}
 /* L30: */
    }
 /*     Unscale */
    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr);
 L40:
 /*     Use Selection Sort to minimize swaps of singular vectors */
    i__1 = *n;
    for (ii = 2; ii <= i__1; ++ii) {
 	i__ = ii - 1;
 	kk = i__;
 	p = d__[i__];
 	i__2 = *n;
 	for (j = ii; j <= i__2; ++j) {
 	    if (d__[j] > p) {
 		kk = j;
 		p = d__[j];
 	    }
 /* L50: */
 	}
 	if (kk != i__) {
 	    d__[kk] = d__[i__];
 	    d__[i__] = p;
 	    if (icompq == 1) {
 		iq[i__] = kk;
 	    } else if (icompq == 2) {
 		dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
 			c__1);
 		dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
 	    }
 	} else if (icompq == 1) {
 	    iq[i__] = i__;
 	}
 /* L60: */
    }
 /*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
    if (icompq == 1) {
 	if (iuplo == 1) {
 	    iq[*n] = 1;
 	} else {
 	    iq[*n] = 0;
 	}
    }
 /*     If B is lower bidiagonal, update U by those Givens rotations */
 /*     which rotated B to be upper bidiagonal */
    if (iuplo == 2 && icompq == 2) {
 	dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
    }
    return 0;
 /*     End of DBDSDC */
 } /* dbdsdc_ */
--- a/lapack-netlib/SRC/dbdsqr.c
+++ b/lapack-netlib/SRC/dbdsqr.c
--- a/lapack-netlib/SRC/dbdsvdx.c
+++ b/lapack-netlib/SRC/dbdsvdx.c
--- a/lapack-netlib/SRC/dcombssq.c
+++ b/lapack-netlib/SRC/dcombssq.c
@@ -0,0 +1,486 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DCOMBSSQ adds two scaled sum of squares quantities. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DCOMBSSQ( V1, V2 ) */
 /*       DOUBLE PRECISION   V1( 2 ), V2( 2 ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DCOMBSSQ adds two scaled sum of squares quantities, V1 := V1 + V2. */
 /* > That is, */
 /* > */
 /* >    V1_scale**2 * V1_sumsq := V1_scale**2 * V1_sumsq */
 /* >                            + V2_scale**2 * V2_sumsq */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in,out] V1 */
 /* > \verbatim */
 /* >          V1 is DOUBLE PRECISION array, dimension (2). */
 /* >          The first scaled sum. */
 /* >          V1(1) = V1_scale, V1(2) = V1_sumsq. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] V2 */
 /* > \verbatim */
 /* >          V2 is DOUBLE PRECISION array, dimension (2). */
 /* >          The second scaled sum. */
 /* >          V2(1) = V2_scale, V2(2) = V2_sumsq. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2018 */
 /* > \ingroup OTHERauxiliary */
 /*  ===================================================================== */
 /* Subroutine */ int dcombssq_(doublereal *v1, doublereal *v2)
 {
    /* System generated locals */
    doublereal d__1;
 /*  -- LAPACK auxiliary routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2018 */
 /* ===================================================================== */
    /* Parameter adjustments */
    --v2;
    --v1;
    /* Function Body */
    if (v1[1] >= v2[1]) {
 	if (v1[1] != 0.) {
 /* Computing 2nd power */
 	    d__1 = v2[1] / v1[1];
 	    v1[2] += d__1 * d__1 * v2[2];
 	} else {
 	    v1[2] += v2[2];
 	}
    } else {
 /* Computing 2nd power */
 	d__1 = v1[1] / v2[1];
 	v1[2] = v2[2] + d__1 * d__1 * v1[2];
 	v1[1] = v2[1];
    }
    return 0;
 /*     End of DCOMBSSQ */
 } /* dcombssq_ */
--- a/lapack-netlib/SRC/ddisna.c
+++ b/lapack-netlib/SRC/ddisna.c
@@ -0,0 +1,651 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DDISNA */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DDISNA + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ddisna.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ddisna.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ddisna.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DDISNA( JOB, M, N, D, SEP, INFO ) */
 /*       CHARACTER          JOB */
 /*       INTEGER            INFO, M, N */
 /*       DOUBLE PRECISION   D( * ), SEP( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DDISNA computes the reciprocal condition numbers for the eigenvectors */
 /* > of a real symmetric or complex Hermitian matrix or for the left or */
 /* > right singular vectors of a general m-by-n matrix. The reciprocal */
 /* > condition number is the 'gap' between the corresponding eigenvalue or */
 /* > singular value and the nearest other one. */
 /* > */
 /* > The bound on the error, measured by angle in radians, in the I-th */
 /* > computed vector is given by */
 /* > */
 /* >        DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) */
 /* > */
 /* > where ANORM = 2-norm(A) = f2cmax( abs( D(j) ) ).  SEP(I) is not allowed */
 /* > to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of */
 /* > the error bound. */
 /* > */
 /* > DDISNA may also be used to compute error bounds for eigenvectors of */
 /* > the generalized symmetric definite eigenproblem. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] JOB */
 /* > \verbatim */
 /* >          JOB is CHARACTER*1 */
 /* >          Specifies for which problem the reciprocal condition numbers */
 /* >          should be computed: */
 /* >          = 'E':  the eigenvectors of a symmetric/Hermitian matrix; */
 /* >          = 'L':  the left singular vectors of a general matrix; */
 /* >          = 'R':  the right singular vectors of a general matrix. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix. M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          If JOB = 'L' or 'R', the number of columns of the matrix, */
 /* >          in which case N >= 0. Ignored if JOB = 'E'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is DOUBLE PRECISION array, dimension (M) if JOB = 'E' */
 /* >                              dimension (f2cmin(M,N)) if JOB = 'L' or 'R' */
 /* >          The eigenvalues (if JOB = 'E') or singular values (if JOB = */
 /* >          'L' or 'R') of the matrix, in either increasing or decreasing */
 /* >          order. If singular values, they must be non-negative. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] SEP */
 /* > \verbatim */
 /* >          SEP is DOUBLE PRECISION array, dimension (M) if JOB = 'E' */
 /* >                               dimension (f2cmin(M,N)) if JOB = 'L' or 'R' */
 /* >          The reciprocal condition numbers of the vectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup auxOTHERcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int ddisna_(char *job, integer *m, integer *n, doublereal *
 	d__, doublereal *sep, integer *info)
 {
    /* System generated locals */
    integer i__1;
    doublereal d__1, d__2, d__3;
    /* Local variables */
    logical decr, left, incr, sing;
    integer i__, k;
    logical eigen;
    extern logical lsame_(char *, char *);
    doublereal anorm;
    logical right;
    extern doublereal dlamch_(char *);
    doublereal oldgap, safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal newgap, thresh, eps;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    --sep;
    --d__;
    /* Function Body */
    *info = 0;
    eigen = lsame_(job, "E");
    left = lsame_(job, "L");
    right = lsame_(job, "R");
    sing = left || right;
    if (eigen) {
 	k = *m;
    } else if (sing) {
 	k = f2cmin(*m,*n);
    }
    if (! eigen && ! sing) {
 	*info = -1;
    } else if (*m < 0) {
 	*info = -2;
    } else if (k < 0) {
 	*info = -3;
    } else {
 	incr = TRUE_;
 	decr = TRUE_;
 	i__1 = k - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (incr) {
 		incr = incr && d__[i__] <= d__[i__ + 1];
 	    }
 	    if (decr) {
 		decr = decr && d__[i__] >= d__[i__ + 1];
 	    }
 /* L10: */
 	}
 	if (sing && k > 0) {
 	    if (incr) {
 		incr = incr && 0. <= d__[1];
 	    }
 	    if (decr) {
 		decr = decr && d__[k] >= 0.;
 	    }
 	}
 	if (! (incr || decr)) {
 	    *info = -4;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DDISNA", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (k == 0) {
 	return 0;
    }
 /*     Compute reciprocal condition numbers */
    if (k == 1) {
 	sep[1] = dlamch_("O");
    } else {
 	oldgap = (d__1 = d__[2] - d__[1], abs(d__1));
 	sep[1] = oldgap;
 	i__1 = k - 1;
 	for (i__ = 2; i__ <= i__1; ++i__) {
 	    newgap = (d__1 = d__[i__ + 1] - d__[i__], abs(d__1));
 	    sep[i__] = f2cmin(oldgap,newgap);
 	    oldgap = newgap;
 /* L20: */
 	}
 	sep[k] = oldgap;
    }
    if (sing) {
 	if (left && *m > *n || right && *m < *n) {
 	    if (incr) {
 		sep[1] = f2cmin(sep[1],d__[1]);
 	    }
 	    if (decr) {
 /* Computing MIN */
 		d__1 = sep[k], d__2 = d__[k];
 		sep[k] = f2cmin(d__1,d__2);
 	    }
 	}
    }
 /*     Ensure that reciprocal condition numbers are not less than */
 /*     threshold, in order to limit the size of the error bound */
    eps = dlamch_("E");
    safmin = dlamch_("S");
 /* Computing MAX */
    d__2 = abs(d__[1]), d__3 = (d__1 = d__[k], abs(d__1));
    anorm = f2cmax(d__2,d__3);
    if (anorm == 0.) {
 	thresh = eps;
    } else {
 /* Computing MAX */
 	d__1 = eps * anorm;
 	thresh = f2cmax(d__1,safmin);
    }
    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
 	d__1 = sep[i__];
 	sep[i__] = f2cmax(d__1,thresh);
 /* L30: */
    }
    return 0;
 /*     End of DDISNA */
 } /* ddisna_ */
--- a/lapack-netlib/SRC/dgbbrd.c
+++ b/lapack-netlib/SRC/dgbbrd.c
--- a/lapack-netlib/SRC/dgbcon.c
+++ b/lapack-netlib/SRC/dgbcon.c
@@ -0,0 +1,725 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 /* > \brief \b DGBCON */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGBCON + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbcon.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbcon.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbcon.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, */
 /*                          WORK, IWORK, INFO ) */
 /*       CHARACTER          NORM */
 /*       INTEGER            INFO, KL, KU, LDAB, N */
 /*       DOUBLE PRECISION   ANORM, RCOND */
 /*       INTEGER            IPIV( * ), IWORK( * ) */
 /*       DOUBLE PRECISION   AB( LDAB, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGBCON estimates the reciprocal of the condition number of a real */
 /* > general band matrix A, in either the 1-norm or the infinity-norm, */
 /* > using the LU factorization computed by DGBTRF. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as */
 /* >    RCOND = 1 / ( norm(A) * norm(inv(A)) ). */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] NORM */
 /* > \verbatim */
 /* >          NORM is CHARACTER*1 */
 /* >          Specifies whether the 1-norm condition number or the */
 /* >          infinity-norm condition number is required: */
 /* >          = '1' or 'O':  1-norm; */
 /* >          = 'I':         Infinity-norm. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >          The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >          The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AB */
 /* > \verbatim */
 /* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
 /* >          Details of the LU factorization of the band matrix A, as */
 /* >          computed by DGBTRF.  U is stored as an upper triangular band */
 /* >          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
 /* >          the multipliers used during the factorization are stored in */
 /* >          rows KL+KU+2 to 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= N, row i of the matrix was */
 /* >          interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is DOUBLE PRECISION */
 /* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
 /* >          If NORM = 'I', the infinity-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is DOUBLE PRECISION */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(norm(A) * norm(inv(A))). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (3*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGBcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgbcon_(char *norm, integer *n, integer *kl, integer *ku,
 	 doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, 
 	doublereal *rcond, doublereal *work, integer *iwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3;
    doublereal d__1;
    /* Local variables */
    integer kase;
    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer kase1, j;
    doublereal t, scale;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int drscl_(integer *, doublereal *, doublereal *, 
 	    integer *);
    logical lnoti;
    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, 
 	    integer *, doublereal *, integer *), dlacn2_(integer *, 
 	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
 	    integer *);
    integer kd;
    extern doublereal dlamch_(char *);
    integer lm, jp, ix;
    extern integer idamax_(integer *, doublereal *, integer *);
    extern /* Subroutine */ int dlatbs_(char *, char *, char *, char *, 
 	    integer *, integer *, doublereal *, integer *, doublereal *, 
 	    doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
    doublereal ainvnm;
    logical onenrm;
    char normin[1];
    doublereal smlnum;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --ipiv;
    --work;
    --iwork;
    /* Function Body */
    *info = 0;
    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
    if (! onenrm && ! lsame_(norm, "I")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kl < 0) {
 	*info = -3;
    } else if (*ku < 0) {
 	*info = -4;
    } else if (*ldab < (*kl << 1) + *ku + 1) {
 	*info = -6;
    } else if (*anorm < 0.) {
 	*info = -8;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGBCON", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    *rcond = 0.;
    if (*n == 0) {
 	*rcond = 1.;
 	return 0;
    } else if (*anorm == 0.) {
 	return 0;
    }
    smlnum = dlamch_("Safe minimum");
 /*     Estimate the norm of inv(A). */
    ainvnm = 0.;
    *(unsigned char *)normin = 'N';
    if (onenrm) {
 	kase1 = 1;
    } else {
 	kase1 = 2;
    }
    kd = *kl + *ku + 1;
    lnoti = *kl > 0;
    kase = 0;
 L10:
    dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
    if (kase != 0) {
 	if (kase == kase1) {
 /*           Multiply by inv(L). */
 	    if (lnoti) {
 		i__1 = *n - 1;
 		for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 		    i__2 = *kl, i__3 = *n - j;
 		    lm = f2cmin(i__2,i__3);
 		    jp = ipiv[j];
 		    t = work[jp];
 		    if (jp != j) {
 			work[jp] = work[j];
 			work[j] = t;
 		    }
 		    d__1 = -t;
 		    daxpy_(&lm, &d__1, &ab[kd + 1 + j * ab_dim1], &c__1, &
 			    work[j + 1], &c__1);
 /* L20: */
 		}
 	    }
 /*           Multiply by inv(U). */
 	    i__1 = *kl + *ku;
 	    dlatbs_("Upper", "No transpose", "Non-unit", normin, n, &i__1, &
 		    ab[ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 
 		    1], info);
 	} else {
 /*           Multiply by inv(U**T). */
 	    i__1 = *kl + *ku;
 	    dlatbs_("Upper", "Transpose", "Non-unit", normin, n, &i__1, &ab[
 		    ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 1], 
 		    info);
 /*           Multiply by inv(L**T). */
 	    if (lnoti) {
 		for (j = *n - 1; j >= 1; --j) {
 /* Computing MIN */
 		    i__1 = *kl, i__2 = *n - j;
 		    lm = f2cmin(i__1,i__2);
 		    work[j] -= ddot_(&lm, &ab[kd + 1 + j * ab_dim1], &c__1, &
 			    work[j + 1], &c__1);
 		    jp = ipiv[j];
 		    if (jp != j) {
 			t = work[jp];
 			work[jp] = work[j];
 			work[j] = t;
 		    }
 /* L30: */
 		}
 	    }
 	}
 /*        Divide X by 1/SCALE if doing so will not cause overflow. */
 	*(unsigned char *)normin = 'Y';
 	if (scale != 1.) {
 	    ix = idamax_(n, &work[1], &c__1);
 	    if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) 
 		    {
 		goto L40;
 	    }
 	    drscl_(n, &scale, &work[1], &c__1);
 	}
 	goto L10;
    }
 /*     Compute the estimate of the reciprocal condition number. */
    if (ainvnm != 0.) {
 	*rcond = 1. / ainvnm / *anorm;
    }
 L40:
    return 0;
 /*     End of DGBCON */
 } /* dgbcon_ */
--- a/lapack-netlib/SRC/dgbequ.c
+++ b/lapack-netlib/SRC/dgbequ.c
@@ -0,0 +1,764 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGBEQU */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGBEQU + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbequ.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbequ.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbequ.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, */
 /*                          AMAX, INFO ) */
 /*       INTEGER            INFO, KL, KU, LDAB, M, N */
 /*       DOUBLE PRECISION   AMAX, COLCND, ROWCND */
 /*       DOUBLE PRECISION   AB( LDAB, * ), C( * ), R( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGBEQU computes row and column scalings intended to equilibrate an */
 /* > M-by-N band matrix A and reduce its condition number.  R returns the */
 /* > row scale factors and C the column scale factors, chosen to try to */
 /* > make the largest element in each row and column of the matrix B with */
 /* > elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */
 /* > */
 /* > R(i) and C(j) are restricted to be between SMLNUM = smallest safe */
 /* > number and BIGNUM = largest safe number.  Use of these scaling */
 /* > factors is not guaranteed to reduce the condition number of A but */
 /* > works well in practice. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >          The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >          The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AB */
 /* > \verbatim */
 /* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
 /* >          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th */
 /* >          column of A is stored in the j-th column of the array AB as */
 /* >          follows: */
 /* >          AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] R */
 /* > \verbatim */
 /* >          R is DOUBLE PRECISION array, dimension (M) */
 /* >          If INFO = 0, or INFO > M, R contains the row scale factors */
 /* >          for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] C */
 /* > \verbatim */
 /* >          C is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0, C contains the column scale factors for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ROWCND */
 /* > \verbatim */
 /* >          ROWCND is DOUBLE PRECISION */
 /* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
 /* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
 /* >          AMAX is neither too large nor too small, it is not worth */
 /* >          scaling by R. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] COLCND */
 /* > \verbatim */
 /* >          COLCND is DOUBLE PRECISION */
 /* >          If INFO = 0, COLCND contains the ratio of the smallest */
 /* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
 /* >          worth scaling by C. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AMAX */
 /* > \verbatim */
 /* >          AMAX is DOUBLE PRECISION */
 /* >          Absolute value of largest matrix element.  If AMAX is very */
 /* >          close to overflow or very close to underflow, the matrix */
 /* >          should be scaled. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is */
 /* >                <= M:  the i-th row of A is exactly zero */
 /* >                >  M:  the (i-M)-th column of A is exactly zero */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGBcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgbequ_(integer *m, integer *n, integer *kl, integer *ku,
 	 doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, 
 	doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *
 	info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
    doublereal d__1, d__2, d__3;
    /* Local variables */
    integer i__, j;
    doublereal rcmin, rcmax;
    integer kd;
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum, smlnum;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters */
    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --r__;
    --c__;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kl < 0) {
 	*info = -3;
    } else if (*ku < 0) {
 	*info = -4;
    } else if (*ldab < *kl + *ku + 1) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGBEQU", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*m == 0 || *n == 0) {
 	*rowcnd = 1.;
 	*colcnd = 1.;
 	*amax = 0.;
 	return 0;
    }
 /*     Get machine constants. */
    smlnum = dlamch_("S");
    bignum = 1. / smlnum;
 /*     Compute row scale factors. */
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	r__[i__] = 0.;
 /* L10: */
    }
 /*     Find the maximum element in each row. */
    kd = *ku + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MAX */
 	i__2 = j - *ku;
 /* Computing MIN */
 	i__4 = j + *kl;
 	i__3 = f2cmin(i__4,*m);
 	for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) {
 /* Computing MAX */
 	    d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], 
 		    abs(d__1));
 	    r__[i__] = f2cmax(d__2,d__3);
 /* L20: */
 	}
 /* L30: */
    }
 /*     Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
 	d__1 = rcmax, d__2 = r__[i__];
 	rcmax = f2cmax(d__1,d__2);
 /* Computing MIN */
 	d__1 = rcmin, d__2 = r__[i__];
 	rcmin = f2cmin(d__1,d__2);
 /* L40: */
    }
    *amax = rcmax;
    if (rcmin == 0.) {
 /*        Find the first zero scale factor and return an error code. */
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (r__[i__] == 0.) {
 		*info = i__;
 		return 0;
 	    }
 /* L50: */
 	}
    } else {
 /*        Invert the scale factors. */
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 /* Computing MAX */
 	    d__2 = r__[i__];
 	    d__1 = f2cmax(d__2,smlnum);
 	    r__[i__] = 1. / f2cmin(d__1,bignum);
 /* L60: */
 	}
 /*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)) */
 	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }
 /*     Compute column scale factors */
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	c__[j] = 0.;
 /* L70: */
    }
 /*     Find the maximum element in each column, */
 /*     assuming the row scaling computed above. */
    kd = *ku + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MAX */
 	i__3 = j - *ku;
 /* Computing MIN */
 	i__4 = j + *kl;
 	i__2 = f2cmin(i__4,*m);
 	for (i__ = f2cmax(i__3,1); i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(
 		    d__1)) * r__[i__];
 	    c__[j] = f2cmax(d__2,d__3);
 /* L80: */
 	}
 /* L90: */
    }
 /*     Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 	d__1 = rcmin, d__2 = c__[j];
 	rcmin = f2cmin(d__1,d__2);
 /* Computing MAX */
 	d__1 = rcmax, d__2 = c__[j];
 	rcmax = f2cmax(d__1,d__2);
 /* L100: */
    }
    if (rcmin == 0.) {
 /*        Find the first zero scale factor and return an error code. */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    if (c__[j] == 0.) {
 		*info = *m + j;
 		return 0;
 	    }
 /* L110: */
 	}
    } else {
 /*        Invert the scale factors. */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 /* Computing MAX */
 	    d__2 = c__[j];
 	    d__1 = f2cmax(d__2,smlnum);
 	    c__[j] = 1. / f2cmin(d__1,bignum);
 /* L120: */
 	}
 /*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)) */
 	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }
    return 0;
 /*     End of DGBEQU */
 } /* dgbequ_ */
--- a/lapack-netlib/SRC/dgbequb.c
+++ b/lapack-netlib/SRC/dgbequb.c
@@ -0,0 +1,783 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGBEQUB */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGBEQUB + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbequb
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbequb
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbequb
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, */
 /*                           AMAX, INFO ) */
 /*       INTEGER            INFO, KL, KU, LDAB, M, N */
 /*       DOUBLE PRECISION   AMAX, COLCND, ROWCND */
 /*       DOUBLE PRECISION   AB( LDAB, * ), C( * ), R( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGBEQUB computes row and column scalings intended to equilibrate an */
 /* > M-by-N matrix A and reduce its condition number.  R returns the row */
 /* > scale factors and C the column scale factors, chosen to try to make */
 /* > the largest element in each row and column of the matrix B with */
 /* > elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */
 /* > the radix. */
 /* > */
 /* > R(i) and C(j) are restricted to be a power of the radix between */
 /* > SMLNUM = smallest safe number and BIGNUM = largest safe number.  Use */
 /* > of these scaling factors is not guaranteed to reduce the condition */
 /* > number of A but works well in practice. */
 /* > */
 /* > This routine differs from DGEEQU by restricting the scaling factors */
 /* > to a power of the radix.  Barring over- and underflow, scaling by */
 /* > these factors introduces no additional rounding errors.  However, the */
 /* > scaled entries' magnitudes are no longer approximately 1 but lie */
 /* > between sqrt(radix) and 1/sqrt(radix). */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >          The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >          The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AB */
 /* > \verbatim */
 /* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
 /* >          On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */
 /* >          The j-th column of A is stored in the j-th column of the */
 /* >          array AB as follows: */
 /* >          AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array A.  LDAB >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] R */
 /* > \verbatim */
 /* >          R is DOUBLE PRECISION array, dimension (M) */
 /* >          If INFO = 0 or INFO > M, R contains the row scale factors */
 /* >          for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] C */
 /* > \verbatim */
 /* >          C is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0,  C contains the column scale factors for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ROWCND */
 /* > \verbatim */
 /* >          ROWCND is DOUBLE PRECISION */
 /* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
 /* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
 /* >          AMAX is neither too large nor too small, it is not worth */
 /* >          scaling by R. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] COLCND */
 /* > \verbatim */
 /* >          COLCND is DOUBLE PRECISION */
 /* >          If INFO = 0, COLCND contains the ratio of the smallest */
 /* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
 /* >          worth scaling by C. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AMAX */
 /* > \verbatim */
 /* >          AMAX is DOUBLE PRECISION */
 /* >          Absolute value of largest matrix element.  If AMAX is very */
 /* >          close to overflow or very close to underflow, the matrix */
 /* >          should be scaled. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i,  and i is */
 /* >                <= M:  the i-th row of A is exactly zero */
 /* >                >  M:  the (i-M)-th column of A is exactly zero */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGBcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgbequb_(integer *m, integer *n, integer *kl, integer *
 	ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, 
 	doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *
 	info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
    doublereal d__1, d__2, d__3;
    /* Local variables */
    integer i__, j;
    doublereal radix, rcmin, rcmax;
    integer kd;
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum, logrdx, smlnum;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --r__;
    --c__;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kl < 0) {
 	*info = -3;
    } else if (*ku < 0) {
 	*info = -4;
    } else if (*ldab < *kl + *ku + 1) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGBEQUB", &i__1, (ftnlen)7);
 	return 0;
    }
 /*     Quick return if possible. */
    if (*m == 0 || *n == 0) {
 	*rowcnd = 1.;
 	*colcnd = 1.;
 	*amax = 0.;
 	return 0;
    }
 /*     Get machine constants.  Assume SMLNUM is a power of the radix. */
    smlnum = dlamch_("S");
    bignum = 1. / smlnum;
    radix = dlamch_("B");
    logrdx = log(radix);
 /*     Compute row scale factors. */
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	r__[i__] = 0.;
 /* L10: */
    }
 /*     Find the maximum element in each row. */
    kd = *ku + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MAX */
 	i__2 = j - *ku;
 /* Computing MIN */
 	i__4 = j + *kl;
 	i__3 = f2cmin(i__4,*m);
 	for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) {
 /* Computing MAX */
 	    d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], 
 		    abs(d__1));
 	    r__[i__] = f2cmax(d__2,d__3);
 /* L20: */
 	}
 /* L30: */
    }
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	if (r__[i__] > 0.) {
 	    i__3 = (integer) (log(r__[i__]) / logrdx);
 	    r__[i__] = pow_di(&radix, &i__3);
 	}
    }
 /*     Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
 	d__1 = rcmax, d__2 = r__[i__];
 	rcmax = f2cmax(d__1,d__2);
 /* Computing MIN */
 	d__1 = rcmin, d__2 = r__[i__];
 	rcmin = f2cmin(d__1,d__2);
 /* L40: */
    }
    *amax = rcmax;
    if (rcmin == 0.) {
 /*        Find the first zero scale factor and return an error code. */
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (r__[i__] == 0.) {
 		*info = i__;
 		return 0;
 	    }
 /* L50: */
 	}
    } else {
 /*        Invert the scale factors. */
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 /* Computing MAX */
 	    d__2 = r__[i__];
 	    d__1 = f2cmax(d__2,smlnum);
 	    r__[i__] = 1. / f2cmin(d__1,bignum);
 /* L60: */
 	}
 /*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)). */
 	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }
 /*     Compute column scale factors. */
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	c__[j] = 0.;
 /* L70: */
    }
 /*     Find the maximum element in each column, */
 /*     assuming the row scaling computed above. */
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MAX */
 	i__3 = j - *ku;
 /* Computing MIN */
 	i__4 = j + *kl;
 	i__2 = f2cmin(i__4,*m);
 	for (i__ = f2cmax(i__3,1); i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(
 		    d__1)) * r__[i__];
 	    c__[j] = f2cmax(d__2,d__3);
 /* L80: */
 	}
 	if (c__[j] > 0.) {
 	    i__2 = (integer) (log(c__[j]) / logrdx);
 	    c__[j] = pow_di(&radix, &i__2);
 	}
 /* L90: */
    }
 /*     Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 	d__1 = rcmin, d__2 = c__[j];
 	rcmin = f2cmin(d__1,d__2);
 /* Computing MAX */
 	d__1 = rcmax, d__2 = c__[j];
 	rcmax = f2cmax(d__1,d__2);
 /* L100: */
    }
    if (rcmin == 0.) {
 /*        Find the first zero scale factor and return an error code. */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    if (c__[j] == 0.) {
 		*info = *m + j;
 		return 0;
 	    }
 /* L110: */
 	}
    } else {
 /*        Invert the scale factors. */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 /* Computing MAX */
 	    d__2 = c__[j];
 	    d__1 = f2cmax(d__2,smlnum);
 	    c__[j] = 1. / f2cmin(d__1,bignum);
 /* L120: */
 	}
 /*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)). */
 	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }
    return 0;
 /*     End of DGBEQUB */
 } /* dgbequb_ */
--- a/lapack-netlib/SRC/dgbrfs.c
+++ b/lapack-netlib/SRC/dgbrfs.c
@@ -0,0 +1,919 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static doublereal c_b15 = -1.;
 static doublereal c_b17 = 1.;
 /* > \brief \b DGBRFS */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGBRFS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbrfs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbrfs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbrfs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, */
 /*                          IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, */
 /*                          INFO ) */
 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS */
 /*       INTEGER            IPIV( * ), IWORK( * ) */
 /*       DOUBLE PRECISION   AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */
 /*      $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGBRFS improves the computed solution to a system of linear */
 /* > equations when the coefficient matrix is banded, and provides */
 /* > error bounds and backward error estimates for the solution. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >          The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >          The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrices B and X.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AB */
 /* > \verbatim */
 /* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
 /* >          The original band matrix A, stored in rows 1 to KL+KU+1. */
 /* >          The j-th column of A is stored in the j-th column of the */
 /* >          array AB as follows: */
 /* >          AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(n,j+kl). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AFB */
 /* > \verbatim */
 /* >          AFB is DOUBLE PRECISION array, dimension (LDAFB,N) */
 /* >          Details of the LU factorization of the band matrix A, as */
 /* >          computed by DGBTRF.  U is stored as an upper triangular band */
 /* >          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
 /* >          the multipliers used during the factorization are stored in */
 /* >          rows KL+KU+2 to 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAFB */
 /* > \verbatim */
 /* >          LDAFB is INTEGER */
 /* >          The leading dimension of the array AFB.  LDAFB >= 2*KL*KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from DGBTRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
 /* >          The right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] X */
 /* > \verbatim */
 /* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
 /* >          On entry, the solution matrix X, as computed by DGBTRS. */
 /* >          On exit, the improved solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (3*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  ITMAX is the maximum number of steps of iterative refinement. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGBcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgbrfs_(char *trans, integer *n, integer *kl, integer *
 	ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, 
 	integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, 
 	doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, 
 	doublereal *work, integer *iwork, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, 
 	    x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
    doublereal d__1, d__2, d__3;
    /* Local variables */
    integer kase;
    doublereal safe1, safe2;
    integer i__, j, k;
    doublereal s;
    extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer *
 	    , integer *, doublereal *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *), daxpy_(integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *, integer *);
    integer count;
    extern /* Subroutine */ int dlacn2_(integer *, doublereal *, doublereal *,
 	     integer *, doublereal *, integer *, integer *);
    integer kk;
    extern doublereal dlamch_(char *);
    doublereal xk;
    integer nz;
    doublereal safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dgbtrs_(
 	    char *, integer *, integer *, integer *, integer *, doublereal *, 
 	    integer *, integer *, doublereal *, integer *, integer *);
    logical notran;
    char transt[1];
    doublereal lstres, eps;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    afb_dim1 = *ldafb;
    afb_offset = 1 + afb_dim1 * 1;
    afb -= afb_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --iwork;
    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kl < 0) {
 	*info = -3;
    } else if (*ku < 0) {
 	*info = -4;
    } else if (*nrhs < 0) {
 	*info = -5;
    } else if (*ldab < *kl + *ku + 1) {
 	*info = -7;
    } else if (*ldafb < (*kl << 1) + *ku + 1) {
 	*info = -9;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -12;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -14;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGBRFS", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*n == 0 || *nrhs == 0) {
 	i__1 = *nrhs;
 	for (j = 1; j <= i__1; ++j) {
 	    ferr[j] = 0.;
 	    berr[j] = 0.;
 /* L10: */
 	}
 	return 0;
    }
    if (notran) {
 	*(unsigned char *)transt = 'T';
    } else {
 	*(unsigned char *)transt = 'N';
    }
 /*     NZ = maximum number of nonzero elements in each row of A, plus 1 */
 /* Computing MIN */
    i__1 = *kl + *ku + 2, i__2 = *n + 1;
    nz = f2cmin(i__1,i__2);
    eps = dlamch_("Epsilon");
    safmin = dlamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;
 /*     Do for each right hand side */
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
 	count = 1;
 	lstres = 3.;
 L20:
 /*        Loop until stopping criterion is satisfied. */
 /*        Compute residual R = B - op(A) * X, */
 /*        where op(A) = A, A**T, or A**H, depending on TRANS. */
 	dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
 	dgbmv_(trans, n, n, kl, ku, &c_b15, &ab[ab_offset], ldab, &x[j * 
 		x_dim1 + 1], &c__1, &c_b17, &work[*n + 1], &c__1);
 /*        Compute componentwise relative backward error from formula */
 /*        f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */
 /*        where abs(Z) is the componentwise absolute value of the matrix */
 /*        or vector Z.  If the i-th component of the denominator is less */
 /*        than SAFE2, then SAFE1 is added to the i-th components of the */
 /*        numerator and denominator before dividing. */
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1));
 /* L30: */
 	}
 /*        Compute abs(op(A))*abs(X) + abs(B). */
 	if (notran) {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		kk = *ku + 1 - k;
 		xk = (d__1 = x[k + j * x_dim1], abs(d__1));
 /* Computing MAX */
 		i__3 = 1, i__4 = k - *ku;
 /* Computing MIN */
 		i__6 = *n, i__7 = k + *kl;
 		i__5 = f2cmin(i__6,i__7);
 		for (i__ = f2cmax(i__3,i__4); i__ <= i__5; ++i__) {
 		    work[i__] += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1)
 			    ) * xk;
 /* L40: */
 		}
 /* L50: */
 	    }
 	} else {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		s = 0.;
 		kk = *ku + 1 - k;
 /* Computing MAX */
 		i__5 = 1, i__3 = k - *ku;
 /* Computing MIN */
 		i__6 = *n, i__7 = k + *kl;
 		i__4 = f2cmin(i__6,i__7);
 		for (i__ = f2cmax(i__5,i__3); i__ <= i__4; ++i__) {
 		    s += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1)) * (
 			    d__2 = x[i__ + j * x_dim1], abs(d__2));
 /* L60: */
 		}
 		work[k] += s;
 /* L70: */
 	    }
 	}
 	s = 0.;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (work[i__] > safe2) {
 /* Computing MAX */
 		d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[
 			i__];
 		s = f2cmax(d__2,d__3);
 	    } else {
 /* Computing MAX */
 		d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) 
 			/ (work[i__] + safe1);
 		s = f2cmax(d__2,d__3);
 	    }
 /* L80: */
 	}
 	berr[j] = s;
 /*        Test stopping criterion. Continue iterating if */
 /*           1) The residual BERR(J) is larger than machine epsilon, and */
 /*           2) BERR(J) decreased by at least a factor of 2 during the */
 /*              last iteration, and */
 /*           3) At most ITMAX iterations tried. */
 	if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) {
 /*           Update solution and try again. */
 	    dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1]
 		    , &work[*n + 1], n, info);
 	    daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
 		    ;
 	    lstres = berr[j];
 	    ++count;
 	    goto L20;
 	}
 /*        Bound error from formula */
 /*        norm(X - XTRUE) / norm(X) .le. FERR = */
 /*        norm( abs(inv(op(A)))* */
 /*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */
 /*        where */
 /*          norm(Z) is the magnitude of the largest component of Z */
 /*          inv(op(A)) is the inverse of op(A) */
 /*          abs(Z) is the componentwise absolute value of the matrix or */
 /*             vector Z */
 /*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
 /*          EPS is machine epsilon */
 /*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
 /*        is incremented by SAFE1 if the i-th component of */
 /*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */
 /*        Use DLACN2 to estimate the infinity-norm of the matrix */
 /*           inv(op(A)) * diag(W), */
 /*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (work[i__] > safe2) {
 		work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * 
 			work[i__];
 	    } else {
 		work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * 
 			work[i__] + safe1;
 	    }
 /* L90: */
 	}
 	kase = 0;
 L100:
 	dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
 		kase, isave);
 	if (kase != 0) {
 	    if (kase == 1) {
 /*              Multiply by diag(W)*inv(op(A)**T). */
 		dgbtrs_(transt, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &
 			ipiv[1], &work[*n + 1], n, info);
 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    work[*n + i__] *= work[i__];
 /* L110: */
 		}
 	    } else {
 /*              Multiply by inv(op(A))*diag(W). */
 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    work[*n + i__] *= work[i__];
 /* L120: */
 		}
 		dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &
 			ipiv[1], &work[*n + 1], n, info);
 	    }
 	    goto L100;
 	}
 /*        Normalize error. */
 	lstres = 0.;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1));
 	    lstres = f2cmax(d__2,d__3);
 /* L130: */
 	}
 	if (lstres != 0.) {
 	    ferr[j] /= lstres;
 	}
 /* L140: */
    }
    return 0;
 /*     End of DGBRFS */
 } /* dgbrfs_ */
--- a/lapack-netlib/SRC/dgbrfsx.c
+++ b/lapack-netlib/SRC/dgbrfsx.c
--- a/lapack-netlib/SRC/dgbsv.c
+++ b/lapack-netlib/SRC/dgbsv.c
@@ -0,0 +1,622 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief <b> DGBSV computes the solution to system of linear equations A * X = B for GB matrices</b> (simpl
 e driver) */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGBSV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbsv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbsv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbsv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) */
 /*       INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGBSV computes the solution to a real system of linear equations */
 /* > A * X = B, where A is a band matrix of order N with KL subdiagonals */
 /* > and KU superdiagonals, and X and B are N-by-NRHS matrices. */
 /* > */
 /* > The LU decomposition with partial pivoting and row interchanges is */
 /* > used to factor A as A = L * U, where L is a product of permutation */
 /* > and unit lower triangular matrices with KL subdiagonals, and U is */
 /* > upper triangular with KL+KU superdiagonals.  The factored form of A */
 /* > is then used to solve the system of equations A * X = B. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >          The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >          The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
 /* >          On entry, the matrix A in band storage, in rows KL+1 to */
 /* >          2*KL+KU+1; rows 1 to KL of the array need not be set. */
 /* >          The j-th column of A is stored in the j-th column of the */
 /* >          array AB as follows: */
 /* >          AB(KL+KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+KL) */
 /* >          On exit, details of the factorization: U is stored as an */
 /* >          upper triangular band matrix with KL+KU superdiagonals in */
 /* >          rows 1 to KL+KU+1, and the multipliers used during the */
 /* >          factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices that define the permutation matrix P; */
 /* >          row i of the matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and the solution has not been computed. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGBsolve */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The band storage scheme is illustrated by the following example, when */
 /* >  M = N = 6, KL = 2, KU = 1: */
 /* > */
 /* >  On entry:                       On exit: */
 /* > */
 /* >      *    *    *    +    +    +       *    *    *   u14  u25  u36 */
 /* >      *    *    +    +    +    +       *    *   u13  u24  u35  u46 */
 /* >      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
 /* >     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */
 /* >     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   * */
 /* >     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    * */
 /* > */
 /* >  Array elements marked * are not used by the routine; elements marked */
 /* >  + need not be set on entry, but are required by the routine to store */
 /* >  elements of U because of fill-in resulting from the row interchanges. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgbsv_(integer *n, integer *kl, integer *ku, integer *
 	nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, 
 	integer *ldb, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, b_dim1, b_offset, i__1;
    /* Local variables */
    extern /* Subroutine */ int dgbtrf_(integer *, integer *, integer *, 
 	    integer *, doublereal *, integer *, integer *, integer *), 
 	    xerbla_(char *, integer *, ftnlen), dgbtrs_(char *, integer *, 
 	    integer *, integer *, integer *, doublereal *, integer *, integer 
 	    *, doublereal *, integer *, integer *);
 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
    } else if (*kl < 0) {
 	*info = -2;
    } else if (*ku < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*ldab < (*kl << 1) + *ku + 1) {
 	*info = -6;
    } else if (*ldb < f2cmax(*n,1)) {
 	*info = -9;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGBSV ", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Compute the LU factorization of the band matrix A. */
    dgbtrf_(n, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);
    if (*info == 0) {
 /*        Solve the system A*X = B, overwriting B with X. */
 	dgbtrs_("No transpose", n, kl, ku, nrhs, &ab[ab_offset], ldab, &ipiv[
 		1], &b[b_offset], ldb, info);
    }
    return 0;
 /*     End of DGBSV */
 } /* dgbsv_ */
--- a/lapack-netlib/SRC/dgbsvx.c
+++ b/lapack-netlib/SRC/dgbsvx.c
--- a/lapack-netlib/SRC/dgbsvxx.c
+++ b/lapack-netlib/SRC/dgbsvxx.c
--- a/lapack-netlib/SRC/dgbtf2.c
+++ b/lapack-netlib/SRC/dgbtf2.c
@@ -0,0 +1,698 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static doublereal c_b9 = -1.;
 /* > \brief \b DGBTF2 computes the LU factorization of a general band matrix using the unblocked version of th
 e algorithm. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGBTF2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtf2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtf2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtf2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) */
 /*       INTEGER            INFO, KL, KU, LDAB, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   AB( LDAB, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGBTF2 computes an LU factorization of a real m-by-n band matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >          The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >          The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] AB */
 /* > \verbatim */
 /* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
 /* >          On entry, the matrix A in band storage, in rows KL+1 to */
 /* >          2*KL+KU+1; rows 1 to KL of the array need not be set. */
 /* >          The j-th column of A is stored in the j-th column of the */
 /* >          array AB as follows: */
 /* >          AB(kl+ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl) */
 /* > */
 /* >          On exit, details of the factorization: U is stored as an */
 /* >          upper triangular band matrix with KL+KU superdiagonals in */
 /* >          rows 1 to KL+KU+1, and the multipliers used during the */
 /* >          factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */
 /* >               has been completed, but the factor U is exactly */
 /* >               singular, and division by zero will occur if it is used */
 /* >               to solve a system of equations. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGBcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The band storage scheme is illustrated by the following example, when */
 /* >  M = N = 6, KL = 2, KU = 1: */
 /* > */
 /* >  On entry:                       On exit: */
 /* > */
 /* >      *    *    *    +    +    +       *    *    *   u14  u25  u36 */
 /* >      *    *    +    +    +    +       *    *   u13  u24  u35  u46 */
 /* >      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
 /* >     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */
 /* >     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   * */
 /* >     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    * */
 /* > */
 /* >  Array elements marked * are not used by the routine; elements marked */
 /* >  + need not be set on entry, but are required by the routine to store */
 /* >  elements of U, because of fill-in resulting from the row */
 /* >  interchanges. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgbtf2_(integer *m, integer *n, integer *kl, integer *ku,
 	 doublereal *ab, integer *ldab, integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
    doublereal d__1;
    /* Local variables */
    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer i__, j;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *), dswap_(integer *, doublereal *, integer *, doublereal 
 	    *, integer *);
    integer km, jp, ju, kv;
    extern integer idamax_(integer *, doublereal *, integer *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     KV is the number of superdiagonals in the factor U, allowing for */
 /*     fill-in. */
    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --ipiv;
    /* Function Body */
    kv = *ku + *kl;
 /*     Test the input parameters. */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kl < 0) {
 	*info = -3;
    } else if (*ku < 0) {
 	*info = -4;
    } else if (*ldab < *kl + kv + 1) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGBTF2", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*m == 0 || *n == 0) {
 	return 0;
    }
 /*     Gaussian elimination with partial pivoting */
 /*     Set fill-in elements in columns KU+2 to KV to zero. */
    i__1 = f2cmin(kv,*n);
    for (j = *ku + 2; j <= i__1; ++j) {
 	i__2 = *kl;
 	for (i__ = kv - j + 2; i__ <= i__2; ++i__) {
 	    ab[i__ + j * ab_dim1] = 0.;
 /* L10: */
 	}
 /* L20: */
    }
 /*     JU is the index of the last column affected by the current stage */
 /*     of the factorization. */
    ju = 1;
    i__1 = f2cmin(*m,*n);
    for (j = 1; j <= i__1; ++j) {
 /*        Set fill-in elements in column J+KV to zero. */
 	if (j + kv <= *n) {
 	    i__2 = *kl;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 		ab[i__ + (j + kv) * ab_dim1] = 0.;
 /* L30: */
 	    }
 	}
 /*        Find pivot and test for singularity. KM is the number of */
 /*        subdiagonal elements in the current column. */
 /* Computing MIN */
 	i__2 = *kl, i__3 = *m - j;
 	km = f2cmin(i__2,i__3);
 	i__2 = km + 1;
 	jp = idamax_(&i__2, &ab[kv + 1 + j * ab_dim1], &c__1);
 	ipiv[j] = jp + j - 1;
 	if (ab[kv + jp + j * ab_dim1] != 0.) {
 /* Computing MAX */
 /* Computing MIN */
 	    i__4 = j + *ku + jp - 1;
 	    i__2 = ju, i__3 = f2cmin(i__4,*n);
 	    ju = f2cmax(i__2,i__3);
 /*           Apply interchange to columns J to JU. */
 	    if (jp != 1) {
 		i__2 = ju - j + 1;
 		i__3 = *ldab - 1;
 		i__4 = *ldab - 1;
 		dswap_(&i__2, &ab[kv + jp + j * ab_dim1], &i__3, &ab[kv + 1 + 
 			j * ab_dim1], &i__4);
 	    }
 	    if (km > 0) {
 /*              Compute multipliers. */
 		d__1 = 1. / ab[kv + 1 + j * ab_dim1];
 		dscal_(&km, &d__1, &ab[kv + 2 + j * ab_dim1], &c__1);
 /*              Update trailing submatrix within the band. */
 		if (ju > j) {
 		    i__2 = ju - j;
 		    i__3 = *ldab - 1;
 		    i__4 = *ldab - 1;
 		    dger_(&km, &i__2, &c_b9, &ab[kv + 2 + j * ab_dim1], &c__1,
 			     &ab[kv + (j + 1) * ab_dim1], &i__3, &ab[kv + 1 + 
 			    (j + 1) * ab_dim1], &i__4);
 		}
 	    }
 	} else {
 /*           If pivot is zero, set INFO to the index of the pivot */
 /*           unless a zero pivot has already been found. */
 	    if (*info == 0) {
 		*info = j;
 	    }
 	}
 /* L40: */
    }
    return 0;
 /*     End of DGBTF2 */
 } /* dgbtf2_ */
--- a/lapack-netlib/SRC/dgbtrf.c
+++ b/lapack-netlib/SRC/dgbtrf.c
--- a/lapack-netlib/SRC/dgbtrs.c
+++ b/lapack-netlib/SRC/dgbtrs.c
@@ -0,0 +1,686 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static doublereal c_b7 = -1.;
 static integer c__1 = 1;
 static doublereal c_b23 = 1.;
 /* > \brief \b DGBTRS */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGBTRS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtrs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtrs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtrs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, */
 /*                          INFO ) */
 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGBTRS solves a system of linear equations */
 /* >    A * X = B  or  A**T * X = B */
 /* > with a general band matrix A using the LU factorization computed */
 /* > by DGBTRF. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations. */
 /* >          = 'N':  A * X = B  (No transpose) */
 /* >          = 'T':  A**T* X = B  (Transpose) */
 /* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >          The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >          The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AB */
 /* > \verbatim */
 /* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
 /* >          Details of the LU factorization of the band matrix A, as */
 /* >          computed by DGBTRF.  U is stored as an upper triangular band */
 /* >          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
 /* >          the multipliers used during the factorization are stored in */
 /* >          rows KL+KU+2 to 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= N, row i of the matrix was */
 /* >          interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
 /* >          On entry, the right hand side matrix B. */
 /* >          On exit, the solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGBcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgbtrs_(char *trans, integer *n, integer *kl, integer *
 	ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, 
 	doublereal *b, integer *ldb, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3;
    /* Local variables */
    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer i__, j, l;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
 	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
 	    doublereal *, doublereal *, integer *), dswap_(integer *, 
 	    doublereal *, integer *, doublereal *, integer *), dtbsv_(char *, 
 	    char *, char *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *);
    logical lnoti;
    integer kd, lm;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical notran;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kl < 0) {
 	*info = -3;
    } else if (*ku < 0) {
 	*info = -4;
    } else if (*nrhs < 0) {
 	*info = -5;
    } else if (*ldab < (*kl << 1) + *ku + 1) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -10;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGBTRS", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*n == 0 || *nrhs == 0) {
 	return 0;
    }
    kd = *ku + *kl + 1;
    lnoti = *kl > 0;
    if (notran) {
 /*        Solve  A*X = B. */
 /*        Solve L*X = B, overwriting B with X. */
 /*        L is represented as a product of permutations and unit lower */
 /*        triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), */
 /*        where each transformation L(i) is a rank-one modification of */
 /*        the identity matrix. */
 	if (lnoti) {
 	    i__1 = *n - 1;
 	    for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 		i__2 = *kl, i__3 = *n - j;
 		lm = f2cmin(i__2,i__3);
 		l = ipiv[j];
 		if (l != j) {
 		    dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb);
 		}
 		dger_(&lm, nrhs, &c_b7, &ab[kd + 1 + j * ab_dim1], &c__1, &b[
 			j + b_dim1], ldb, &b[j + 1 + b_dim1], ldb);
 /* L10: */
 	    }
 	}
 	i__1 = *nrhs;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /*           Solve U*X = B, overwriting B with X. */
 	    i__2 = *kl + *ku;
 	    dtbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &ab[
 		    ab_offset], ldab, &b[i__ * b_dim1 + 1], &c__1);
 /* L20: */
 	}
    } else {
 /*        Solve A**T*X = B. */
 	i__1 = *nrhs;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /*           Solve U**T*X = B, overwriting B with X. */
 	    i__2 = *kl + *ku;
 	    dtbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &ab[ab_offset],
 		     ldab, &b[i__ * b_dim1 + 1], &c__1);
 /* L30: */
 	}
 /*        Solve L**T*X = B, overwriting B with X. */
 	if (lnoti) {
 	    for (j = *n - 1; j >= 1; --j) {
 /* Computing MIN */
 		i__1 = *kl, i__2 = *n - j;
 		lm = f2cmin(i__1,i__2);
 		dgemv_("Transpose", &lm, nrhs, &c_b7, &b[j + 1 + b_dim1], ldb,
 			 &ab[kd + 1 + j * ab_dim1], &c__1, &c_b23, &b[j + 
 			b_dim1], ldb);
 		l = ipiv[j];
 		if (l != j) {
 		    dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb);
 		}
 /* L40: */
 	    }
 	}
    }
    return 0;
 /*     End of DGBTRS */
 } /* dgbtrs_ */
--- a/lapack-netlib/SRC/dgebak.c
+++ b/lapack-netlib/SRC/dgebak.c
@@ -0,0 +1,675 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGEBAK */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEBAK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebak.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebak.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebak.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, */
 /*                          INFO ) */
 /*       CHARACTER          JOB, SIDE */
 /*       INTEGER            IHI, ILO, INFO, LDV, M, N */
 /*       DOUBLE PRECISION   SCALE( * ), V( LDV, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEBAK forms the right or left eigenvectors of a real general matrix */
 /* > by backward transformation on the computed eigenvectors of the */
 /* > balanced matrix output by DGEBAL. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] JOB */
 /* > \verbatim */
 /* >          JOB is CHARACTER*1 */
 /* >          Specifies the type of backward transformation required: */
 /* >          = 'N': do nothing, return immediately; */
 /* >          = 'P': do backward transformation for permutation only; */
 /* >          = 'S': do backward transformation for scaling only; */
 /* >          = 'B': do backward transformations for both permutation and */
 /* >                 scaling. */
 /* >          JOB must be the same as the argument JOB supplied to DGEBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'R':  V contains right eigenvectors; */
 /* >          = 'L':  V contains left eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrix V.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* >          The integers ILO and IHI determined by DGEBAL. */
 /* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] SCALE */
 /* > \verbatim */
 /* >          SCALE is DOUBLE PRECISION array, dimension (N) */
 /* >          Details of the permutation and scaling factors, as returned */
 /* >          by DGEBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix V.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] V */
 /* > \verbatim */
 /* >          V is DOUBLE PRECISION array, dimension (LDV,M) */
 /* >          On entry, the matrix of right or left eigenvectors to be */
 /* >          transformed, as returned by DHSEIN or DTREVC. */
 /* >          On exit, V is overwritten by the transformed eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the array V. LDV >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo, 
 	integer *ihi, doublereal *scale, integer *m, doublereal *v, integer *
 	ldv, integer *info)
 {
    /* System generated locals */
    integer v_dim1, v_offset, i__1;
    /* Local variables */
    integer i__, k;
    doublereal s;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *);
    logical leftv;
    integer ii;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical rightv;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Decode and Test the input parameters */
    /* Parameter adjustments */
    --scale;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;
    /* Function Body */
    rightv = lsame_(side, "R");
    leftv = lsame_(side, "L");
    *info = 0;
    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
 	    && ! lsame_(job, "B")) {
 	*info = -1;
    } else if (! rightv && ! leftv) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
 	*info = -4;
    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
 	*info = -5;
    } else if (*m < 0) {
 	*info = -7;
    } else if (*ldv < f2cmax(1,*n)) {
 	*info = -9;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEBAK", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*n == 0) {
 	return 0;
    }
    if (*m == 0) {
 	return 0;
    }
    if (lsame_(job, "N")) {
 	return 0;
    }
    if (*ilo == *ihi) {
 	goto L30;
    }
 /*     Backward balance */
    if (lsame_(job, "S") || lsame_(job, "B")) {
 	if (rightv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
 		s = scale[i__];
 		dscal_(m, &s, &v[i__ + v_dim1], ldv);
 /* L10: */
 	    }
 	}
 	if (leftv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
 		s = 1. / scale[i__];
 		dscal_(m, &s, &v[i__ + v_dim1], ldv);
 /* L20: */
 	    }
 	}
    }
 /*     Backward permutation */
 /*     For  I = ILO-1 step -1 until 1, */
 /*              IHI+1 step 1 until N do -- */
 L30:
    if (lsame_(job, "P") || lsame_(job, "B")) {
 	if (rightv) {
 	    i__1 = *n;
 	    for (ii = 1; ii <= i__1; ++ii) {
 		i__ = ii;
 		if (i__ >= *ilo && i__ <= *ihi) {
 		    goto L40;
 		}
 		if (i__ < *ilo) {
 		    i__ = *ilo - ii;
 		}
 		k = (integer) scale[i__];
 		if (k == i__) {
 		    goto L40;
 		}
 		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L40:
 		;
 	    }
 	}
 	if (leftv) {
 	    i__1 = *n;
 	    for (ii = 1; ii <= i__1; ++ii) {
 		i__ = ii;
 		if (i__ >= *ilo && i__ <= *ihi) {
 		    goto L50;
 		}
 		if (i__ < *ilo) {
 		    i__ = *ilo - ii;
 		}
 		k = (integer) scale[i__];
 		if (k == i__) {
 		    goto L50;
 		}
 		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L50:
 		;
 	    }
 	}
    }
    return 0;
 /*     End of DGEBAK */
 } /* dgebak_ */
--- a/lapack-netlib/SRC/dgebal.c
+++ b/lapack-netlib/SRC/dgebal.c
@@ -0,0 +1,840 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 /* > \brief \b DGEBAL */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEBAL + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebal.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebal.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebal.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) */
 /*       CHARACTER          JOB */
 /*       INTEGER            IHI, ILO, INFO, LDA, N */
 /*       DOUBLE PRECISION   A( LDA, * ), SCALE( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEBAL balances a general real matrix A.  This involves, first, */
 /* > permuting A by a similarity transformation to isolate eigenvalues */
 /* > in the first 1 to ILO-1 and last IHI+1 to N elements on the */
 /* > diagonal; and second, applying a diagonal similarity transformation */
 /* > to rows and columns ILO to IHI to make the rows and columns as */
 /* > close in norm as possible.  Both steps are optional. */
 /* > */
 /* > Balancing may reduce the 1-norm of the matrix, and improve the */
 /* > accuracy of the computed eigenvalues and/or eigenvectors. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] JOB */
 /* > \verbatim */
 /* >          JOB is CHARACTER*1 */
 /* >          Specifies the operations to be performed on A: */
 /* >          = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0 */
 /* >                  for i = 1,...,N; */
 /* >          = 'P':  permute only; */
 /* >          = 'S':  scale only; */
 /* >          = 'B':  both permute and scale. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the input matrix A. */
 /* >          On exit,  A is overwritten by the balanced matrix. */
 /* >          If JOB = 'N', A is not referenced. */
 /* >          See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > \param[out] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* >          ILO and IHI are set to integers such that on exit */
 /* >          A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. */
 /* >          If JOB = 'N' or 'S', ILO = 1 and IHI = N. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] SCALE */
 /* > \verbatim */
 /* >          SCALE is DOUBLE PRECISION array, dimension (N) */
 /* >          Details of the permutations and scaling factors applied to */
 /* >          A.  If P(j) is the index of the row and column interchanged */
 /* >          with row and column j and D(j) is the scaling factor */
 /* >          applied to row and column j, then */
 /* >          SCALE(j) = P(j)    for j = 1,...,ILO-1 */
 /* >                   = D(j)    for j = ILO,...,IHI */
 /* >                   = P(j)    for j = IHI+1,...,N. */
 /* >          The order in which the interchanges are made is N to IHI+1, */
 /* >          then 1 to ILO-1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date June 2017 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The permutations consist of row and column interchanges which put */
 /* >  the matrix in the form */
 /* > */
 /* >             ( T1   X   Y  ) */
 /* >     P A P = (  0   B   Z  ) */
 /* >             (  0   0   T2 ) */
 /* > */
 /* >  where T1 and T2 are upper triangular matrices whose eigenvalues lie */
 /* >  along the diagonal.  The column indices ILO and IHI mark the starting */
 /* >  and ending columns of the submatrix B. Balancing consists of applying */
 /* >  a diagonal similarity transformation inv(D) * B * D to make the */
 /* >  1-norms of each row of B and its corresponding column nearly equal. */
 /* >  The output matrix is */
 /* > */
 /* >     ( T1     X*D          Y    ) */
 /* >     (  0  inv(D)*B*D  inv(D)*Z ). */
 /* >     (  0      0           T2   ) */
 /* > */
 /* >  Information about the permutations P and the diagonal matrix D is */
 /* >  returned in the vector SCALE. */
 /* > */
 /* >  This subroutine is based on the EISPACK routine BALANC. */
 /* > */
 /* >  Modified by Tzu-Yi Chen, Computer Science Division, University of */
 /* >    California at Berkeley, USA */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer *
 	lda, integer *ilo, integer *ihi, doublereal *scale, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    doublereal d__1, d__2;
    /* Local variables */
    integer iexc;
    extern doublereal dnrm2_(integer *, doublereal *, integer *);
    doublereal c__, f, g;
    integer i__, j, k, l, m;
    doublereal r__, s;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *);
    doublereal sfmin1, sfmin2, sfmax1, sfmax2, ca, ra;
    extern doublereal dlamch_(char *);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern logical disnan_(doublereal *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical noconv;
    integer ica, ira;
 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */
 /*  ===================================================================== */
 /*     Test the input parameters */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --scale;
    /* Function Body */
    *info = 0;
    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
 	    && ! lsame_(job, "B")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEBAL", &i__1, (ftnlen)6);
 	return 0;
    }
    k = 1;
    l = *n;
    if (*n == 0) {
 	goto L210;
    }
    if (lsame_(job, "N")) {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    scale[i__] = 1.;
 /* L10: */
 	}
 	goto L210;
    }
    if (lsame_(job, "S")) {
 	goto L120;
    }
 /*     Permutation to isolate eigenvalues if possible */
    goto L50;
 /*     Row and column exchange. */
 L20:
    scale[m] = (doublereal) j;
    if (j == m) {
 	goto L30;
    }
    dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
    i__1 = *n - k + 1;
    dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
 L30:
    switch (iexc) {
 	case 1:  goto L40;
 	case 2:  goto L80;
    }
 /*     Search for rows isolating an eigenvalue and push them down. */
 L40:
    if (l == 1) {
 	goto L210;
    }
    --l;
 L50:
    for (j = l; j >= 1; --j) {
 	i__1 = l;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (i__ == j) {
 		goto L60;
 	    }
 	    if (a[j + i__ * a_dim1] != 0.) {
 		goto L70;
 	    }
 L60:
 	    ;
 	}
 	m = l;
 	iexc = 1;
 	goto L20;
 L70:
 	;
    }
    goto L90;
 /*     Search for columns isolating an eigenvalue and push them left. */
 L80:
    ++k;
 L90:
    i__1 = l;
    for (j = k; j <= i__1; ++j) {
 	i__2 = l;
 	for (i__ = k; i__ <= i__2; ++i__) {
 	    if (i__ == j) {
 		goto L100;
 	    }
 	    if (a[i__ + j * a_dim1] != 0.) {
 		goto L110;
 	    }
 L100:
 	    ;
 	}
 	m = k;
 	iexc = 2;
 	goto L20;
 L110:
 	;
    }
 L120:
    i__1 = l;
    for (i__ = k; i__ <= i__1; ++i__) {
 	scale[i__] = 1.;
 /* L130: */
    }
    if (lsame_(job, "P")) {
 	goto L210;
    }
 /*     Balance the submatrix in rows K to L. */
 /*     Iterative loop for norm reduction */
    sfmin1 = dlamch_("S") / dlamch_("P");
    sfmax1 = 1. / sfmin1;
    sfmin2 = sfmin1 * 2.;
    sfmax2 = 1. / sfmin2;
 L140:
    noconv = FALSE_;
    i__1 = l;
    for (i__ = k; i__ <= i__1; ++i__) {
 	i__2 = l - k + 1;
 	c__ = dnrm2_(&i__2, &a[k + i__ * a_dim1], &c__1);
 	i__2 = l - k + 1;
 	r__ = dnrm2_(&i__2, &a[i__ + k * a_dim1], lda);
 	ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
 	ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1));
 	i__2 = *n - k + 1;
 	ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda);
 	ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1));
 /*        Guard against zero C or R due to underflow. */
 	if (c__ == 0. || r__ == 0.) {
 	    goto L200;
 	}
 	g = r__ / 2.;
 	f = 1.;
 	s = c__ + r__;
 L160:
 /* Computing MAX */
 	d__1 = f2cmax(f,c__);
 /* Computing MIN */
 	d__2 = f2cmin(r__,g);
 	if (c__ >= g || f2cmax(d__1,ca) >= sfmax2 || f2cmin(d__2,ra) <= sfmin2) {
 	    goto L170;
 	}
 	d__1 = c__ + f + ca + r__ + g + ra;
 	if (disnan_(&d__1)) {
 /*           Exit if NaN to avoid infinite loop */
 	    *info = -3;
 	    i__2 = -(*info);
 	    xerbla_("DGEBAL", &i__2, (ftnlen)6);
 	    return 0;
 	}
 	f *= 2.;
 	c__ *= 2.;
 	ca *= 2.;
 	r__ /= 2.;
 	g /= 2.;
 	ra /= 2.;
 	goto L160;
 L170:
 	g = c__ / 2.;
 L180:
 /* Computing MIN */
 	d__1 = f2cmin(f,c__), d__1 = f2cmin(d__1,g);
 	if (g < r__ || f2cmax(r__,ra) >= sfmax2 || f2cmin(d__1,ca) <= sfmin2) {
 	    goto L190;
 	}
 	f /= 2.;
 	c__ /= 2.;
 	g /= 2.;
 	ca /= 2.;
 	r__ *= 2.;
 	ra *= 2.;
 	goto L180;
 /*        Now balance. */
 L190:
 	if (c__ + r__ >= s * .95) {
 	    goto L200;
 	}
 	if (f < 1. && scale[i__] < 1.) {
 	    if (f * scale[i__] <= sfmin1) {
 		goto L200;
 	    }
 	}
 	if (f > 1. && scale[i__] > 1.) {
 	    if (scale[i__] >= sfmax1 / f) {
 		goto L200;
 	    }
 	}
 	g = 1. / f;
 	scale[i__] *= f;
 	noconv = TRUE_;
 	i__2 = *n - k + 1;
 	dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
 	dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
 L200:
 	;
    }
    if (noconv) {
 	goto L140;
    }
 L210:
    *ilo = k;
    *ihi = l;
    return 0;
 /*     End of DGEBAL */
 } /* dgebal_ */
--- a/lapack-netlib/SRC/dgebd2.c
+++ b/lapack-netlib/SRC/dgebd2.c
@@ -0,0 +1,745 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 /* > \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEBD2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebd2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebd2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebd2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ), */
 /*      $                   TAUQ( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEBD2 reduces a real general m by n matrix A to upper or lower */
 /* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */
 /* > */
 /* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows in the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns in the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the m by n general matrix to be reduced. */
 /* >          On exit, */
 /* >          if m >= n, the diagonal and the first superdiagonal are */
 /* >            overwritten with the upper bidiagonal matrix B; the */
 /* >            elements below the diagonal, with the array TAUQ, represent */
 /* >            the orthogonal matrix Q as a product of elementary */
 /* >            reflectors, and the elements above the first superdiagonal, */
 /* >            with the array TAUP, represent the orthogonal matrix P as */
 /* >            a product of elementary reflectors; */
 /* >          if m < n, the diagonal and the first subdiagonal are */
 /* >            overwritten with the lower bidiagonal matrix B; the */
 /* >            elements below the first subdiagonal, with the array TAUQ, */
 /* >            represent the orthogonal matrix Q as a product of */
 /* >            elementary reflectors, and the elements above the diagonal, */
 /* >            with the array TAUP, represent the orthogonal matrix P as */
 /* >            a product of elementary reflectors. */
 /* >          See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The diagonal elements of the bidiagonal matrix B: */
 /* >          D(i) = A(i,i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is DOUBLE PRECISION array, dimension (f2cmin(M,N)-1) */
 /* >          The off-diagonal elements of the bidiagonal matrix B: */
 /* >          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */
 /* >          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUQ */
 /* > \verbatim */
 /* >          TAUQ is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix Q. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUP */
 /* > \verbatim */
 /* >          TAUP is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix P. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (f2cmax(M,N)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit. */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date June 2017 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrices Q and P are represented as products of elementary */
 /* >  reflectors: */
 /* > */
 /* >  If m >= n, */
 /* > */
 /* >     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1) */
 /* > */
 /* >  Each H(i) and G(i) has the form: */
 /* > */
 /* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
 /* > */
 /* >  where tauq and taup are real scalars, and v and u are real vectors; */
 /* >  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */
 /* >  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */
 /* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
 /* > */
 /* >  If m < n, */
 /* > */
 /* >     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m) */
 /* > */
 /* >  Each H(i) and G(i) has the form: */
 /* > */
 /* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
 /* > */
 /* >  where tauq and taup are real scalars, and v and u are real vectors; */
 /* >  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */
 /* >  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */
 /* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples: */
 /* > */
 /* >  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n): */
 /* > */
 /* >    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 ) */
 /* >    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 ) */
 /* >    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 ) */
 /* >    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 ) */
 /* >    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 ) */
 /* >    (  v1  v2  v3  v4  v5 ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of B, vi */
 /* >  denotes an element of the vector defining H(i), and ui an element of */
 /* >  the vector defining G(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
 	taup, doublereal *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    integer i__;
    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *), dlarfg_(integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */
 /*  ===================================================================== */
 /*     Test the input parameters */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    --work;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info < 0) {
 	i__1 = -(*info);
 	xerbla_("DGEBD2", &i__1, (ftnlen)6);
 	return 0;
    }
    if (*m >= *n) {
 /*        Reduce to upper bidiagonal form */
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
 	    i__2 = *m - i__ + 1;
 /* Computing MIN */
 	    i__3 = i__ + 1;
 	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * 
 		    a_dim1], &c__1, &tauq[i__]);
 	    d__[i__] = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.;
 /*           Apply H(i) to A(i:m,i+1:n) from the left */
 	    if (i__ < *n) {
 		i__2 = *m - i__ + 1;
 		i__3 = *n - i__;
 		dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &
 			tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]
 			);
 	    }
 	    a[i__ + i__ * a_dim1] = d__[i__];
 	    if (i__ < *n) {
 /*              Generate elementary reflector G(i) to annihilate */
 /*              A(i,i+2:n) */
 		i__2 = *n - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
 		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + f2cmin(
 			i__3,*n) * a_dim1], lda, &taup[i__]);
 		e[i__] = a[i__ + (i__ + 1) * a_dim1];
 		a[i__ + (i__ + 1) * a_dim1] = 1.;
 /*              Apply G(i) to A(i+1:m,i+1:n) from the right */
 		i__2 = *m - i__;
 		i__3 = *n - i__;
 		dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], 
 			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], 
 			lda, &work[1]);
 		a[i__ + (i__ + 1) * a_dim1] = e[i__];
 	    } else {
 		taup[i__] = 0.;
 	    }
 /* L10: */
 	}
    } else {
 /*        Reduce to lower bidiagonal form */
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
 	    i__2 = *n - i__ + 1;
 /* Computing MIN */
 	    i__3 = i__ + 1;
 	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * 
 		    a_dim1], lda, &taup[i__]);
 	    d__[i__] = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.;
 /*           Apply G(i) to A(i+1:m,i:n) from the right */
 	    if (i__ < *m) {
 		i__2 = *m - i__;
 		i__3 = *n - i__ + 1;
 		dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &
 			taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
 	    }
 	    a[i__ + i__ * a_dim1] = d__[i__];
 	    if (i__ < *m) {
 /*              Generate elementary reflector H(i) to annihilate */
 /*              A(i+2:m,i) */
 		i__2 = *m - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
 		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*m) + 
 			i__ * a_dim1], &c__1, &tauq[i__]);
 		e[i__] = a[i__ + 1 + i__ * a_dim1];
 		a[i__ + 1 + i__ * a_dim1] = 1.;
 /*              Apply H(i) to A(i+1:m,i+1:n) from the left */
 		i__2 = *m - i__;
 		i__3 = *n - i__;
 		dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
 			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], 
 			lda, &work[1]);
 		a[i__ + 1 + i__ * a_dim1] = e[i__];
 	    } else {
 		tauq[i__] = 0.;
 	    }
 /* L20: */
 	}
    }
    return 0;
 /*     End of DGEBD2 */
 } /* dgebd2_ */
--- a/lapack-netlib/SRC/dgebrd.c
+++ b/lapack-netlib/SRC/dgebrd.c
@@ -0,0 +1,784 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 static doublereal c_b21 = -1.;
 static doublereal c_b22 = 1.;
 /* > \brief \b DGEBRD */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEBRD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, */
 /*                          INFO ) */
 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ), */
 /*      $                   TAUQ( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEBRD reduces a general real M-by-N matrix A to upper or lower */
 /* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */
 /* > */
 /* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows in the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns in the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N general matrix to be reduced. */
 /* >          On exit, */
 /* >          if m >= n, the diagonal and the first superdiagonal are */
 /* >            overwritten with the upper bidiagonal matrix B; the */
 /* >            elements below the diagonal, with the array TAUQ, represent */
 /* >            the orthogonal matrix Q as a product of elementary */
 /* >            reflectors, and the elements above the first superdiagonal, */
 /* >            with the array TAUP, represent the orthogonal matrix P as */
 /* >            a product of elementary reflectors; */
 /* >          if m < n, the diagonal and the first subdiagonal are */
 /* >            overwritten with the lower bidiagonal matrix B; the */
 /* >            elements below the first subdiagonal, with the array TAUQ, */
 /* >            represent the orthogonal matrix Q as a product of */
 /* >            elementary reflectors, and the elements above the diagonal, */
 /* >            with the array TAUP, represent the orthogonal matrix P as */
 /* >            a product of elementary reflectors. */
 /* >          See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The diagonal elements of the bidiagonal matrix B: */
 /* >          D(i) = A(i,i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is DOUBLE PRECISION array, dimension (f2cmin(M,N)-1) */
 /* >          The off-diagonal elements of the bidiagonal matrix B: */
 /* >          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */
 /* >          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUQ */
 /* > \verbatim */
 /* >          TAUQ is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix Q. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUP */
 /* > \verbatim */
 /* >          TAUP is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix P. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,M,N). */
 /* >          For optimum performance LWORK >= (M+N)*NB, where NB */
 /* >          is the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2017 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrices Q and P are represented as products of elementary */
 /* >  reflectors: */
 /* > */
 /* >  If m >= n, */
 /* > */
 /* >     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1) */
 /* > */
 /* >  Each H(i) and G(i) has the form: */
 /* > */
 /* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
 /* > */
 /* >  where tauq and taup are real scalars, and v and u are real vectors; */
 /* >  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */
 /* >  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */
 /* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
 /* > */
 /* >  If m < n, */
 /* > */
 /* >     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m) */
 /* > */
 /* >  Each H(i) and G(i) has the form: */
 /* > */
 /* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
 /* > */
 /* >  where tauq and taup are real scalars, and v and u are real vectors; */
 /* >  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */
 /* >  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */
 /* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples: */
 /* > */
 /* >  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n): */
 /* > */
 /* >    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 ) */
 /* >    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 ) */
 /* >    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 ) */
 /* >    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 ) */
 /* >    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 ) */
 /* >    (  v1  v2  v3  v4  v5 ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of B, vi */
 /* >  denotes an element of the vector defining H(i), and ui an element of */
 /* >  the vector defining G(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
 	taup, doublereal *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    integer i__, j;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer nbmin, iinfo, minmn;
    extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
 	     doublereal *, integer *);
    integer nb;
    extern /* Subroutine */ int dlabrd_(integer *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
 	     doublereal *, doublereal *, integer *, doublereal *, integer *);
    integer nx, ws;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwrkx, ldwrky, lwkopt;
    logical lquery;
 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */
 /*  ===================================================================== */
 /*     Test the input parameters */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    --work;
    /* Function Body */
    *info = 0;
 /* Computing MAX */
    i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
 	    ftnlen)6, (ftnlen)1);
    nb = f2cmax(i__1,i__2);
    lwkopt = (*m + *n) * nb;
    work[1] = (doublereal) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m);
 	if (*lwork < f2cmax(i__1,*n) && ! lquery) {
 	    *info = -10;
 	}
    }
    if (*info < 0) {
 	i__1 = -(*info);
 	xerbla_("DGEBRD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    minmn = f2cmin(*m,*n);
    if (minmn == 0) {
 	work[1] = 1.;
 	return 0;
    }
    ws = f2cmax(*m,*n);
    ldwrkx = *m;
    ldwrky = *n;
    if (nb > 1 && nb < minmn) {
 /*        Set the crossover point NX. */
 /* Computing MAX */
 	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 /*        Determine when to switch from blocked to unblocked code. */
 	if (nx < minmn) {
 	    ws = (*m + *n) * nb;
 	    if (*lwork < ws) {
 /*              Not enough work space for the optimal NB, consider using */
 /*              a smaller block size. */
 		nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
 			ftnlen)6, (ftnlen)1);
 		if (*lwork >= (*m + *n) * nbmin) {
 		    nb = *lwork / (*m + *n);
 		} else {
 		    nb = 1;
 		    nx = minmn;
 		}
 	    }
 	}
    } else {
 	nx = minmn;
    }
    i__1 = minmn - nx;
    i__2 = nb;
    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /*        Reduce rows and columns i:i+nb-1 to bidiagonal form and return */
 /*        the matrices X and Y which are needed to update the unreduced */
 /*        part of the matrix */
 	i__3 = *m - i__ + 1;
 	i__4 = *n - i__ + 1;
 	dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
 		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx 
 		* nb + 1], &ldwrky);
 /*        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update */
 /*        of the form  A := A - V*Y**T - X*U**T */
 	i__3 = *m - i__ - nb + 1;
 	i__4 = *n - i__ - nb + 1;
 	dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ 
 		+ nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
 		ldwrky, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
 	i__3 = *m - i__ - nb + 1;
 	i__4 = *n - i__ - nb + 1;
 	dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, &
 		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
 		c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
 /*        Copy diagonal and off-diagonal elements of B back into A */
 	if (*m >= *n) {
 	    i__3 = i__ + nb - 1;
 	    for (j = i__; j <= i__3; ++j) {
 		a[j + j * a_dim1] = d__[j];
 		a[j + (j + 1) * a_dim1] = e[j];
 /* L10: */
 	    }
 	} else {
 	    i__3 = i__ + nb - 1;
 	    for (j = i__; j <= i__3; ++j) {
 		a[j + j * a_dim1] = d__[j];
 		a[j + 1 + j * a_dim1] = e[j];
 /* L20: */
 	    }
 	}
 /* L30: */
    }
 /*     Use unblocked code to reduce the remainder of the matrix */
    i__2 = *m - i__ + 1;
    i__1 = *n - i__ + 1;
    dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
 	    tauq[i__], &taup[i__], &work[1], &iinfo);
    work[1] = (doublereal) ws;
    return 0;
 /*     End of DGEBRD */
 } /* dgebrd_ */
--- a/lapack-netlib/SRC/dgecon.c
+++ b/lapack-netlib/SRC/dgecon.c
@@ -0,0 +1,658 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 /* > \brief \b DGECON */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGECON + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgecon.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgecon.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgecon.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, */
 /*                          INFO ) */
 /*       CHARACTER          NORM */
 /*       INTEGER            INFO, LDA, N */
 /*       DOUBLE PRECISION   ANORM, RCOND */
 /*       INTEGER            IWORK( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGECON estimates the reciprocal of the condition number of a general */
 /* > real matrix A, in either the 1-norm or the infinity-norm, using */
 /* > the LU factorization computed by DGETRF. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as */
 /* >    RCOND = 1 / ( norm(A) * norm(inv(A)) ). */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] NORM */
 /* > \verbatim */
 /* >          NORM is CHARACTER*1 */
 /* >          Specifies whether the 1-norm condition number or the */
 /* >          infinity-norm condition number is required: */
 /* >          = '1' or 'O':  1-norm; */
 /* >          = 'I':         Infinity-norm. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          The factors L and U from the factorization A = P*L*U */
 /* >          as computed by DGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is DOUBLE PRECISION */
 /* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
 /* >          If NORM = 'I', the infinity-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is DOUBLE PRECISION */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(norm(A) * norm(inv(A))). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (4*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgecon_(char *norm, integer *n, doublereal *a, integer *
 	lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer *
 	iwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1;
    doublereal d__1;
    /* Local variables */
    integer kase, kase1;
    doublereal scale;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int drscl_(integer *, doublereal *, doublereal *, 
 	    integer *), dlacn2_(integer *, doublereal *, doublereal *, 
 	    integer *, doublereal *, integer *, integer *);
    extern doublereal dlamch_(char *);
    doublereal sl;
    integer ix;
    extern integer idamax_(integer *, doublereal *, integer *);
    doublereal su;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal ainvnm;
    extern /* Subroutine */ int dlatrs_(char *, char *, char *, char *, 
 	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
 	    doublereal *, integer *);
    logical onenrm;
    char normin[1];
    doublereal smlnum;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --work;
    --iwork;
    /* Function Body */
    *info = 0;
    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
    if (! onenrm && ! lsame_(norm, "I")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*anorm < 0.) {
 	*info = -5;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGECON", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    *rcond = 0.;
    if (*n == 0) {
 	*rcond = 1.;
 	return 0;
    } else if (*anorm == 0.) {
 	return 0;
    }
    smlnum = dlamch_("Safe minimum");
 /*     Estimate the norm of inv(A). */
    ainvnm = 0.;
    *(unsigned char *)normin = 'N';
    if (onenrm) {
 	kase1 = 1;
    } else {
 	kase1 = 2;
    }
    kase = 0;
 L10:
    dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
    if (kase != 0) {
 	if (kase == kase1) {
 /*           Multiply by inv(L). */
 	    dlatrs_("Lower", "No transpose", "Unit", normin, n, &a[a_offset], 
 		    lda, &work[1], &sl, &work[(*n << 1) + 1], info);
 /*           Multiply by inv(U). */
 	    dlatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[
 		    a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info);
 	} else {
 /*           Multiply by inv(U**T). */
 	    dlatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset],
 		     lda, &work[1], &su, &work[*n * 3 + 1], info);
 /*           Multiply by inv(L**T). */
 	    dlatrs_("Lower", "Transpose", "Unit", normin, n, &a[a_offset], 
 		    lda, &work[1], &sl, &work[(*n << 1) + 1], info);
 	}
 /*        Divide X by 1/(SL*SU) if doing so will not cause overflow. */
 	scale = sl * su;
 	*(unsigned char *)normin = 'Y';
 	if (scale != 1.) {
 	    ix = idamax_(n, &work[1], &c__1);
 	    if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) 
 		    {
 		goto L20;
 	    }
 	    drscl_(n, &scale, &work[1], &c__1);
 	}
 	goto L10;
    }
 /*     Compute the estimate of the reciprocal condition number. */
    if (ainvnm != 0.) {
 	*rcond = 1. / ainvnm / *anorm;
    }
 L20:
    return 0;
 /*     End of DGECON */
 } /* dgecon_ */
--- a/lapack-netlib/SRC/dgeequ.c
+++ b/lapack-netlib/SRC/dgeequ.c
@@ -0,0 +1,733 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGEEQU */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEEQU + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeequ.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeequ.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeequ.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */
 /*                          INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       DOUBLE PRECISION   AMAX, COLCND, ROWCND */
 /*       DOUBLE PRECISION   A( LDA, * ), C( * ), R( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEEQU computes row and column scalings intended to equilibrate an */
 /* > M-by-N matrix A and reduce its condition number.  R returns the row */
 /* > scale factors and C the column scale factors, chosen to try to make */
 /* > the largest element in each row and column of the matrix B with */
 /* > elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */
 /* > */
 /* > R(i) and C(j) are restricted to be between SMLNUM = smallest safe */
 /* > number and BIGNUM = largest safe number.  Use of these scaling */
 /* > factors is not guaranteed to reduce the condition number of A but */
 /* > works well in practice. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          The M-by-N matrix whose equilibration factors are */
 /* >          to be computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] R */
 /* > \verbatim */
 /* >          R is DOUBLE PRECISION array, dimension (M) */
 /* >          If INFO = 0 or INFO > M, R contains the row scale factors */
 /* >          for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] C */
 /* > \verbatim */
 /* >          C is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0,  C contains the column scale factors for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ROWCND */
 /* > \verbatim */
 /* >          ROWCND is DOUBLE PRECISION */
 /* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
 /* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
 /* >          AMAX is neither too large nor too small, it is not worth */
 /* >          scaling by R. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] COLCND */
 /* > \verbatim */
 /* >          COLCND is DOUBLE PRECISION */
 /* >          If INFO = 0, COLCND contains the ratio of the smallest */
 /* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
 /* >          worth scaling by C. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AMAX */
 /* > \verbatim */
 /* >          AMAX is DOUBLE PRECISION */
 /* >          Absolute value of largest matrix element.  If AMAX is very */
 /* >          close to overflow or very close to underflow, the matrix */
 /* >          should be scaled. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i,  and i is */
 /* >                <= M:  the i-th row of A is exactly zero */
 /* >                >  M:  the (i-M)-th column of A is exactly zero */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgeequ_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal 
 	*colcnd, doublereal *amax, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    doublereal d__1, d__2, d__3;
    /* Local variables */
    integer i__, j;
    doublereal rcmin, rcmax;
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum, smlnum;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --r__;
    --c__;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEEQU", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*m == 0 || *n == 0) {
 	*rowcnd = 1.;
 	*colcnd = 1.;
 	*amax = 0.;
 	return 0;
    }
 /*     Get machine constants. */
    smlnum = dlamch_("S");
    bignum = 1. / smlnum;
 /*     Compute row scale factors. */
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	r__[i__] = 0.;
 /* L10: */
    }
 /*     Find the maximum element in each row. */
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
 	    r__[i__] = f2cmax(d__2,d__3);
 /* L20: */
 	}
 /* L30: */
    }
 /*     Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
 	d__1 = rcmax, d__2 = r__[i__];
 	rcmax = f2cmax(d__1,d__2);
 /* Computing MIN */
 	d__1 = rcmin, d__2 = r__[i__];
 	rcmin = f2cmin(d__1,d__2);
 /* L40: */
    }
    *amax = rcmax;
    if (rcmin == 0.) {
 /*        Find the first zero scale factor and return an error code. */
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (r__[i__] == 0.) {
 		*info = i__;
 		return 0;
 	    }
 /* L50: */
 	}
    } else {
 /*        Invert the scale factors. */
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 /* Computing MAX */
 	    d__2 = r__[i__];
 	    d__1 = f2cmax(d__2,smlnum);
 	    r__[i__] = 1. / f2cmin(d__1,bignum);
 /* L60: */
 	}
 /*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)) */
 	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }
 /*     Compute column scale factors */
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	c__[j] = 0.;
 /* L70: */
    }
 /*     Find the maximum element in each column, */
 /*     assuming the row scaling computed above. */
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * 
 		    r__[i__];
 	    c__[j] = f2cmax(d__2,d__3);
 /* L80: */
 	}
 /* L90: */
    }
 /*     Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 	d__1 = rcmin, d__2 = c__[j];
 	rcmin = f2cmin(d__1,d__2);
 /* Computing MAX */
 	d__1 = rcmax, d__2 = c__[j];
 	rcmax = f2cmax(d__1,d__2);
 /* L100: */
    }
    if (rcmin == 0.) {
 /*        Find the first zero scale factor and return an error code. */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    if (c__[j] == 0.) {
 		*info = *m + j;
 		return 0;
 	    }
 /* L110: */
 	}
    } else {
 /*        Invert the scale factors. */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 /* Computing MAX */
 	    d__2 = c__[j];
 	    d__1 = f2cmax(d__2,smlnum);
 	    c__[j] = 1. / f2cmin(d__1,bignum);
 /* L120: */
 	}
 /*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)) */
 	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }
    return 0;
 /*     End of DGEEQU */
 } /* dgeequ_ */
--- a/lapack-netlib/SRC/dgeequb.c
+++ b/lapack-netlib/SRC/dgeequb.c
@@ -0,0 +1,753 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGEEQUB */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEEQUB + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeequb
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeequb
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeequb
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */
 /*                           INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       DOUBLE PRECISION   AMAX, COLCND, ROWCND */
 /*       DOUBLE PRECISION   A( LDA, * ), C( * ), R( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEEQUB computes row and column scalings intended to equilibrate an */
 /* > M-by-N matrix A and reduce its condition number.  R returns the row */
 /* > scale factors and C the column scale factors, chosen to try to make */
 /* > the largest element in each row and column of the matrix B with */
 /* > elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */
 /* > the radix. */
 /* > */
 /* > R(i) and C(j) are restricted to be a power of the radix between */
 /* > SMLNUM = smallest safe number and BIGNUM = largest safe number.  Use */
 /* > of these scaling factors is not guaranteed to reduce the condition */
 /* > number of A but works well in practice. */
 /* > */
 /* > This routine differs from DGEEQU by restricting the scaling factors */
 /* > to a power of the radix.  Barring over- and underflow, scaling by */
 /* > these factors introduces no additional rounding errors.  However, the */
 /* > scaled entries' magnitudes are no longer approximately 1 but lie */
 /* > between sqrt(radix) and 1/sqrt(radix). */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          The M-by-N matrix whose equilibration factors are */
 /* >          to be computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] R */
 /* > \verbatim */
 /* >          R is DOUBLE PRECISION array, dimension (M) */
 /* >          If INFO = 0 or INFO > M, R contains the row scale factors */
 /* >          for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] C */
 /* > \verbatim */
 /* >          C is DOUBLE PRECISION array, dimension (N) */
 /* >          If INFO = 0,  C contains the column scale factors for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ROWCND */
 /* > \verbatim */
 /* >          ROWCND is DOUBLE PRECISION */
 /* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
 /* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
 /* >          AMAX is neither too large nor too small, it is not worth */
 /* >          scaling by R. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] COLCND */
 /* > \verbatim */
 /* >          COLCND is DOUBLE PRECISION */
 /* >          If INFO = 0, COLCND contains the ratio of the smallest */
 /* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
 /* >          worth scaling by C. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AMAX */
 /* > \verbatim */
 /* >          AMAX is DOUBLE PRECISION */
 /* >          Absolute value of largest matrix element.  If AMAX is very */
 /* >          close to overflow or very close to underflow, the matrix */
 /* >          should be scaled. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i,  and i is */
 /* >                <= M:  the i-th row of A is exactly zero */
 /* >                >  M:  the (i-M)-th column of A is exactly zero */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgeequb_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal 
 	*colcnd, doublereal *amax, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    doublereal d__1, d__2, d__3;
    /* Local variables */
    integer i__, j;
    doublereal radix, rcmin, rcmax;
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    doublereal bignum, logrdx, smlnum;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --r__;
    --c__;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEEQUB", &i__1, (ftnlen)7);
 	return 0;
    }
 /*     Quick return if possible. */
    if (*m == 0 || *n == 0) {
 	*rowcnd = 1.;
 	*colcnd = 1.;
 	*amax = 0.;
 	return 0;
    }
 /*     Get machine constants.  Assume SMLNUM is a power of the radix. */
    smlnum = dlamch_("S");
    bignum = 1. / smlnum;
    radix = dlamch_("B");
    logrdx = log(radix);
 /*     Compute row scale factors. */
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	r__[i__] = 0.;
 /* L10: */
    }
 /*     Find the maximum element in each row. */
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
 	    r__[i__] = f2cmax(d__2,d__3);
 /* L20: */
 	}
 /* L30: */
    }
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	if (r__[i__] > 0.) {
 	    i__2 = (integer) (log(r__[i__]) / logrdx);
 	    r__[i__] = pow_di(&radix, &i__2);
 	}
    }
 /*     Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
 	d__1 = rcmax, d__2 = r__[i__];
 	rcmax = f2cmax(d__1,d__2);
 /* Computing MIN */
 	d__1 = rcmin, d__2 = r__[i__];
 	rcmin = f2cmin(d__1,d__2);
 /* L40: */
    }
    *amax = rcmax;
    if (rcmin == 0.) {
 /*        Find the first zero scale factor and return an error code. */
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (r__[i__] == 0.) {
 		*info = i__;
 		return 0;
 	    }
 /* L50: */
 	}
    } else {
 /*        Invert the scale factors. */
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 /* Computing MAX */
 	    d__2 = r__[i__];
 	    d__1 = f2cmax(d__2,smlnum);
 	    r__[i__] = 1. / f2cmin(d__1,bignum);
 /* L60: */
 	}
 /*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)). */
 	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }
 /*     Compute column scale factors */
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	c__[j] = 0.;
 /* L70: */
    }
 /*     Find the maximum element in each column, */
 /*     assuming the row scaling computed above. */
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * 
 		    r__[i__];
 	    c__[j] = f2cmax(d__2,d__3);
 /* L80: */
 	}
 	if (c__[j] > 0.) {
 	    i__2 = (integer) (log(c__[j]) / logrdx);
 	    c__[j] = pow_di(&radix, &i__2);
 	}
 /* L90: */
    }
 /*     Find the maximum and minimum scale factors. */
    rcmin = bignum;
    rcmax = 0.;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 	d__1 = rcmin, d__2 = c__[j];
 	rcmin = f2cmin(d__1,d__2);
 /* Computing MAX */
 	d__1 = rcmax, d__2 = c__[j];
 	rcmax = f2cmax(d__1,d__2);
 /* L100: */
    }
    if (rcmin == 0.) {
 /*        Find the first zero scale factor and return an error code. */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    if (c__[j] == 0.) {
 		*info = *m + j;
 		return 0;
 	    }
 /* L110: */
 	}
    } else {
 /*        Invert the scale factors. */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 /* Computing MAX */
 	    d__2 = c__[j];
 	    d__1 = f2cmax(d__2,smlnum);
 	    c__[j] = 1. / f2cmin(d__1,bignum);
 /* L120: */
 	}
 /*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)). */
 	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }
    return 0;
 /*     End of DGEEQUB */
 } /* dgeequb_ */
--- a/lapack-netlib/SRC/dgees.c
+++ b/lapack-netlib/SRC/dgees.c
--- a/lapack-netlib/SRC/dgeesx.c
+++ b/lapack-netlib/SRC/dgeesx.c
--- a/lapack-netlib/SRC/dgeev.c
+++ b/lapack-netlib/SRC/dgeev.c
--- a/lapack-netlib/SRC/dgeevx.c
+++ b/lapack-netlib/SRC/dgeevx.c
--- a/lapack-netlib/SRC/dgehd2.c
+++ b/lapack-netlib/SRC/dgehd2.c
@@ -0,0 +1,628 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 /* > \brief \b DGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. 
 */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEHD2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgehd2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgehd2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgehd2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) */
 /*       INTEGER            IHI, ILO, INFO, LDA, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEHD2 reduces a real general matrix A to upper Hessenberg form H by */
 /* > an orthogonal similarity transformation:  Q**T * A * Q = H . */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* > */
 /* >          It is assumed that A is already upper triangular in rows */
 /* >          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
 /* >          set by a previous call to DGEBAL; otherwise they should be */
 /* >          set to 1 and N respectively. See Further Details. */
 /* >          1 <= ILO <= IHI <= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the n by n general matrix to be reduced. */
 /* >          On exit, the upper triangle and the first subdiagonal of A */
 /* >          are overwritten with the upper Hessenberg matrix H, and the */
 /* >          elements below the first subdiagonal, with the array TAU, */
 /* >          represent the orthogonal matrix Q as a product of elementary */
 /* >          reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (N-1) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of (ihi-ilo) elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
 /* >  exit in A(i+2:ihi,i), and tau in TAU(i). */
 /* > */
 /* >  The contents of A are illustrated by the following example, with */
 /* >  n = 7, ilo = 2 and ihi = 6: */
 /* > */
 /* >  on entry,                        on exit, */
 /* > */
 /* >  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
 /* >  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
 /* >  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
 /* >  (                         a )    (                          a ) */
 /* > */
 /* >  where a denotes an element of the original matrix A, h denotes a */
 /* >  modified element of the upper Hessenberg matrix H, and vi denotes an */
 /* >  element of the vector defining H(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi, 
 	doublereal *a, integer *lda, doublereal *tau, doublereal *work, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    integer i__;
    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *), dlarfg_(integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
    doublereal aii;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
 	*info = -2;
    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEHD2", &i__1, (ftnlen)6);
 	return 0;
    }
    i__1 = *ihi - 1;
    for (i__ = *ilo; i__ <= i__1; ++i__) {
 /*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
 	i__2 = *ihi - i__;
 /* Computing MIN */
 	i__3 = i__ + 2;
 	dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*n) + i__ * 
 		a_dim1], &c__1, &tau[i__]);
 	aii = a[i__ + 1 + i__ * a_dim1];
 	a[i__ + 1 + i__ * a_dim1] = 1.;
 /*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
 	i__2 = *ihi - i__;
 	dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
 /*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
 	i__2 = *ihi - i__;
 	i__3 = *n - i__;
 	dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
 	a[i__ + 1 + i__ * a_dim1] = aii;
 /* L10: */
    }
    return 0;
 /*     End of DGEHD2 */
 } /* dgehd2_ */
--- a/lapack-netlib/SRC/dgehrd.c
+++ b/lapack-netlib/SRC/dgehrd.c
@@ -0,0 +1,789 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 static integer c__65 = 65;
 static doublereal c_b25 = -1.;
 static doublereal c_b26 = 1.;
 /* > \brief \b DGEHRD */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEHRD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgehrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgehrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgehrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) */
 /*       INTEGER            IHI, ILO, INFO, LDA, LWORK, N */
 /*       DOUBLE PRECISION  A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEHRD reduces a real general matrix A to upper Hessenberg form H by */
 /* > an orthogonal similarity transformation:  Q**T * A * Q = H . */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* > */
 /* >          It is assumed that A is already upper triangular in rows */
 /* >          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
 /* >          set by a previous call to DGEBAL; otherwise they should be */
 /* >          set to 1 and N respectively. See Further Details. */
 /* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the N-by-N general matrix to be reduced. */
 /* >          On exit, the upper triangle and the first subdiagonal of A */
 /* >          are overwritten with the upper Hessenberg matrix H, and the */
 /* >          elements below the first subdiagonal, with the array TAU, */
 /* >          represent the orthogonal matrix Q as a product of elementary */
 /* >          reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (N-1) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */
 /* >          zero. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (LWORK) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For good performance, LWORK should generally be larger. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of (ihi-ilo) elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
 /* >  exit in A(i+2:ihi,i), and tau in TAU(i). */
 /* > */
 /* >  The contents of A are illustrated by the following example, with */
 /* >  n = 7, ilo = 2 and ihi = 6: */
 /* > */
 /* >  on entry,                        on exit, */
 /* > */
 /* >  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
 /* >  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
 /* >  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
 /* >  (                         a )    (                          a ) */
 /* > */
 /* >  where a denotes an element of the original matrix A, h denotes a */
 /* >  modified element of the upper Hessenberg matrix H, and vi denotes an */
 /* >  element of the vector defining H(i). */
 /* > */
 /* >  This file is a slight modification of LAPACK-3.0's DGEHRD */
 /* >  subroutine incorporating improvements proposed by Quintana-Orti and */
 /* >  Van de Geijn (2006). (See DLAHR2.) */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi, 
 	doublereal *a, integer *lda, doublereal *tau, doublereal *work, 
 	integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    integer i__, j;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer nbmin, iinfo;
    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *), daxpy_(
 	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
 	    integer *), dgehd2_(integer *, integer *, integer *, doublereal *,
 	     integer *, doublereal *, doublereal *, integer *), dlahr2_(
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, doublereal *, integer *, doublereal *, integer *);
    integer ib;
    doublereal ei;
    integer nb, nh;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer nx;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    integer iwt;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
 	*info = -2;
    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -8;
    }
    if (*info == 0) {
 /*        Compute the workspace requirements */
 /* Computing MIN */
 	i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nb = f2cmin(i__1,i__2);
 	lwkopt = *n * nb + 4160;
 	work[1] = (doublereal) lwkopt;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEHRD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
    i__1 = *ilo - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	tau[i__] = 0.;
 /* L10: */
    }
    i__1 = *n - 1;
    for (i__ = f2cmax(1,*ihi); i__ <= i__1; ++i__) {
 	tau[i__] = 0.;
 /* L20: */
    }
 /*     Quick return if possible */
    nh = *ihi - *ilo + 1;
    if (nh <= 1) {
 	work[1] = 1.;
 	return 0;
    }
 /*     Determine the block size */
 /* Computing MIN */
    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
 	    ftnlen)6, (ftnlen)1);
    nb = f2cmin(i__1,i__2);
    nbmin = 2;
    if (nb > 1 && nb < nh) {
 /*        Determine when to cross over from blocked to unblocked code */
 /*        (last block is always handled by unblocked code) */
 /* Computing MAX */
 	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < nh) {
 /*           Determine if workspace is large enough for blocked code */
 	    if (*lwork < *n * nb + 4160) {
 /*              Not enough workspace to use optimal NB:  determine the */
 /*              minimum value of NB, and reduce NB or force use of */
 /*              unblocked code */
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 		if (*lwork >= *n * nbmin + 4160) {
 		    nb = (*lwork - 4160) / *n;
 		} else {
 		    nb = 1;
 		}
 	    }
 	}
    }
    ldwork = *n;
    if (nb < nbmin || nb >= nh) {
 /*        Use unblocked code below */
 	i__ = *ilo;
    } else {
 /*        Use blocked code */
 	iwt = *n * nb + 1;
 	i__1 = *ihi - 1 - nx;
 	i__2 = nb;
 	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = nb, i__4 = *ihi - i__;
 	    ib = f2cmin(i__3,i__4);
 /*           Reduce columns i:i+ib-1 to Hessenberg form, returning the */
 /*           matrices V and T of the block reflector H = I - V*T*V**T */
 /*           which performs the reduction, and also the matrix Y = A*V*T */
 	    dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], &
 		    work[iwt], &c__65, &work[1], &ldwork);
 /*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */
 /*           right, computing  A := A - Y * V**T. V(i+ib,ib-1) must be set */
 /*           to 1 */
 	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
 	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.;
 	    i__3 = *ihi - i__ - ib + 1;
 	    dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, &
 		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
 		    c_b26, &a[(i__ + ib) * a_dim1 + 1], lda);
 	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
 /*           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */
 /*           right */
 	    i__3 = ib - 1;
 	    dtrmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26,
 		     &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork);
 	    i__3 = ib - 2;
 	    for (j = 0; j <= i__3; ++j) {
 		daxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + 
 			j + 1) * a_dim1 + 1], &c__1);
 /* L30: */
 	    }
 /*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */
 /*           left */
 	    i__3 = *ihi - i__;
 	    i__4 = *n - i__ - ib + 1;
 	    dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, &work[iwt], &
 		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
 		    ldwork);
 /* L40: */
 	}
    }
 /*     Use unblocked code to reduce the rest of the matrix */
    dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    work[1] = (doublereal) lwkopt;
    return 0;
 /*     End of DGEHRD */
 } /* dgehrd_ */
--- a/lapack-netlib/SRC/dgejsv.c
+++ b/lapack-netlib/SRC/dgejsv.c
--- a/lapack-netlib/SRC/dgelq.c
+++ b/lapack-netlib/SRC/dgelq.c
@@ -0,0 +1,745 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 /* > \brief \b DGELQ */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, */
 /*                         INFO ) */
 /*       INTEGER           INFO, LDA, M, N, TSIZE, LWORK */
 /*       DOUBLE PRECISION  A( LDA, * ), T( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGELQ computes an LQ factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = ( L 0 ) *  Q */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a N-by-N orthogonal matrix; */
 /* >    L is a lower-triangular M-by-M matrix; */
 /* >    0 is a M-by-(N-M) zero matrix, if M < N. */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and below the diagonal of the array */
 /* >          contain the M-by-f2cmin(M,N) lower trapezoidal matrix L */
 /* >          (L is lower triangular if M <= N); */
 /* >          the elements above the diagonal are used to store part of the */
 /* >          data structure to represent Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)) */
 /* >          On exit, if INFO = 0, T(1) returns optimal (or either minimal */
 /* >          or optimal, if query is assumed) TSIZE. See TSIZE for details. */
 /* >          Remaining T contains part of the data structure used to represent Q. */
 /* >          If one wants to apply or construct Q, then one needs to keep T */
 /* >          (in addition to A) and pass it to further subroutines. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TSIZE */
 /* > \verbatim */
 /* >          TSIZE is INTEGER */
 /* >          If TSIZE >= 5, the dimension of the array T. */
 /* >          If TSIZE = -1 or -2, then a workspace query is assumed. The routine */
 /* >          only calculates the sizes of the T and WORK arrays, returns these */
 /* >          values as the first entries of the T and WORK arrays, and no error */
 /* >          message related to T or WORK is issued by XERBLA. */
 /* >          If TSIZE = -1, the routine calculates optimal size of T for the */
 /* >          optimum performance and returns this value in T(1). */
 /* >          If TSIZE = -2, the routine calculates minimal size of T and */
 /* >          returns this value in T(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
 /* >          or optimal, if query was assumed) LWORK. */
 /* >          See LWORK for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1 or -2, then a workspace query is assumed. The routine */
 /* >          only calculates the sizes of the T and WORK arrays, returns these */
 /* >          values as the first entries of the T and WORK arrays, and no error */
 /* >          message related to T or WORK is issued by XERBLA. */
 /* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
 /* >          optimal performance and returns this value in WORK(1). */
 /* >          If LWORK = -2, the routine calculates minimal size of WORK and */
 /* >          returns this value in WORK(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \par Further Details */
 /*  ==================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > The goal of the interface is to give maximum freedom to the developers for */
 /* > creating any LQ factorization algorithm they wish. The triangular */
 /* > (trapezoidal) L has to be stored in the lower part of A. The lower part of A */
 /* > and the array T can be used to store any relevant information for applying or */
 /* > constructing the Q factor. The WORK array can safely be discarded after exit. */
 /* > */
 /* > Caution: One should not expect the sizes of T and WORK to be the same from one */
 /* > LAPACK implementation to the other, or even from one execution to the other. */
 /* > A workspace query (for T and WORK) is needed at each execution. However, */
 /* > for a given execution, the size of T and WORK are fixed and will not change */
 /* > from one query to the next. */
 /* > */
 /* > \endverbatim */
 /* > */
 /* > \par Further Details particular to this LAPACK implementation: */
 /*  ============================================================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > These details are particular for this LAPACK implementation. Users should not */
 /* > take them for granted. These details may change in the future, and are not likely */
 /* > true for another LAPACK implementation. These details are relevant if one wants */
 /* > to try to understand the code. They are not part of the interface. */
 /* > */
 /* > In this version, */
 /* > */
 /* >          T(2): row block size (MB) */
 /* >          T(3): column block size (NB) */
 /* >          T(6:TSIZE): data structure needed for Q, computed by */
 /* >                           DLASWLQ or DGELQT */
 /* > */
 /* >  Depending on the matrix dimensions M and N, and row and column */
 /* >  block sizes MB and NB returned by ILAENV, DGELQ will use either */
 /* >  DLASWLQ (if the matrix is short-and-wide) or DGELQT to compute */
 /* >  the LQ factorization. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgelq_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *t, integer *tsize, doublereal *work, integer *lwork, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    /* Local variables */
    logical mint, minw;
    integer lwmin, lwreq, lwopt, mb, nb, nblcks;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int dgelqt_(integer *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    logical lminws, lquery;
    integer mintsz;
    extern /* Subroutine */ int dlaswlq_(integer *, integer *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, integer *);
 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */
 /*     November 2019 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --t;
    --work;
    /* Function Body */
    *info = 0;
    lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2;
    mint = FALSE_;
    minw = FALSE_;
    if (*tsize == -2 || *lwork == -2) {
 	if (*tsize != -1) {
 	    mint = TRUE_;
 	}
 	if (*lwork != -1) {
 	    minw = TRUE_;
 	}
    }
 /*     Determine the block size */
    if (f2cmin(*m,*n) > 0) {
 	mb = ilaenv_(&c__1, "DGELQ ", " ", m, n, &c__1, &c_n1, (ftnlen)6, (
 		ftnlen)1);
 	nb = ilaenv_(&c__1, "DGELQ ", " ", m, n, &c__2, &c_n1, (ftnlen)6, (
 		ftnlen)1);
    } else {
 	mb = 1;
 	nb = *n;
    }
    if (mb > f2cmin(*m,*n) || mb < 1) {
 	mb = 1;
    }
    if (nb > *n || nb <= *m) {
 	nb = *n;
    }
    mintsz = *m + 5;
    if (nb > *m && *n > *m) {
 	if ((*n - *m) % (nb - *m) == 0) {
 	    nblcks = (*n - *m) / (nb - *m);
 	} else {
 	    nblcks = (*n - *m) / (nb - *m) + 1;
 	}
    } else {
 	nblcks = 1;
    }
 /*     Determine if the workspace size satisfies minimal size */
    if (*n <= *m || nb <= *m || nb >= *n) {
 	lwmin = f2cmax(1,*n);
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *n;
 	lwopt = f2cmax(i__1,i__2);
    } else {
 	lwmin = f2cmax(1,*m);
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *m;
 	lwopt = f2cmax(i__1,i__2);
    }
    lminws = FALSE_;
 /* Computing MAX */
    i__1 = 1, i__2 = mb * *m * nblcks + 5;
    if ((*tsize < f2cmax(i__1,i__2) || *lwork < lwopt) && *lwork >= lwmin && *
 	    tsize >= mintsz && ! lquery) {
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *m * nblcks + 5;
 	if (*tsize < f2cmax(i__1,i__2)) {
 	    lminws = TRUE_;
 	    mb = 1;
 	    nb = *n;
 	}
 	if (*lwork < lwopt) {
 	    lminws = TRUE_;
 	    mb = 1;
 	}
    }
    if (*n <= *m || nb <= *m || nb >= *n) {
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *n;
 	lwreq = f2cmax(i__1,i__2);
    } else {
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *m;
 	lwreq = f2cmax(i__1,i__2);
    }
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *m * nblcks + 5;
 	if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
 	    *info = -6;
 	} else if (*lwork < lwreq && ! lquery && ! lminws) {
 	    *info = -8;
 	}
    }
    if (*info == 0) {
 	if (mint) {
 	    t[1] = (doublereal) mintsz;
 	} else {
 	    t[1] = (doublereal) (mb * *m * nblcks + 5);
 	}
 	t[2] = (doublereal) mb;
 	t[3] = (doublereal) nb;
 	if (minw) {
 	    work[1] = (doublereal) lwmin;
 	} else {
 	    work[1] = (doublereal) lwreq;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGELQ", &i__1, (ftnlen)5);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    if (f2cmin(*m,*n) == 0) {
 	return 0;
    }
 /*     The LQ Decomposition */
    if (*n <= *m || nb <= *m || nb >= *n) {
 	dgelqt_(m, n, &mb, &a[a_offset], lda, &t[6], &mb, &work[1], info);
    } else {
 	dlaswlq_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &mb, &work[1], 
 		lwork, info);
    }
    work[1] = (doublereal) lwreq;
    return 0;
 /*     End of DGELQ */
 } /* dgelq_ */
--- a/lapack-netlib/SRC/dgelq2.c
+++ b/lapack-netlib/SRC/dgelq2.c
@@ -0,0 +1,597 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorit
 hm. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGELQ2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelq2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelq2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelq2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGELQ2 computes an LQ factorization of a real m-by-n matrix A: */
 /* > */
 /* >    A = ( L 0 ) *  Q */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a n-by-n orthogonal matrix; */
 /* >    L is an lower-triangular m-by-m matrix; */
 /* >    0 is a m-by-(n-m) zero matrix, if m < n. */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, the elements on and below the diagonal of the array */
 /* >          contain the m by f2cmin(m,n) lower trapezoidal matrix L (L is */
 /* >          lower triangular if m <= n); the elements above the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (M) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2019 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
 /* >  and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *), dlarfg_(integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
    doublereal aii;
 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGELQ2", &i__1, (ftnlen)6);
 	return 0;
    }
    k = f2cmin(*m,*n);
    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
 	i__2 = *n - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
 	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * a_dim1]
 		, lda, &tau[i__]);
 	if (i__ < *m) {
 /*           Apply H(i) to A(i+1:m,i:n) from the right */
 	    aii = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.;
 	    i__2 = *m - i__;
 	    i__3 = *n - i__ + 1;
 	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
 		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
 /* L10: */
    }
    return 0;
 /*     End of DGELQ2 */
 } /* dgelq2_ */
--- a/lapack-netlib/SRC/dgelqf.c
+++ b/lapack-netlib/SRC/dgelqf.c
@@ -0,0 +1,700 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 /* > \brief \b DGELQF */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGELQF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGELQF computes an LQ factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = ( L 0 ) *  Q */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a N-by-N orthogonal matrix; */
 /* >    L is an lower-triangular M-by-M matrix; */
 /* >    0 is a M-by-(N-M) zero matrix, if M < N. */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and below the diagonal of the array */
 /* >          contain the m-by-f2cmin(m,n) lower trapezoidal matrix L (L is */
 /* >          lower triangular if m <= n); the elements above the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,M). */
 /* >          For optimum performance LWORK >= M*NB, where NB is the */
 /* >          optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2019 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
 /* >  and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer ib, nb;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer nx;
    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;
 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
    lwkopt = *m * nb;
    work[1] = (doublereal) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*lwork < f2cmax(1,*m) && ! lquery) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGELQF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    k = f2cmin(*m,*n);
    if (k == 0) {
 	work[1] = 1.;
 	return 0;
    }
    nbmin = 2;
    nx = 0;
    iws = *m;
    if (nb > 1 && nb < k) {
 /*        Determine when to cross over from blocked to unblocked code. */
 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {
 /*           Determine if workspace is large enough for blocked code. */
 	    ldwork = *m;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {
 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */
 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }
    if (nb >= nbmin && nb < k && nx < k) {
 /*        Use blocked code initially */
 	i__1 = k - nx;
 	i__2 = nb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);
 /*           Compute the LQ factorization of the current block */
 /*           A(i:i+ib-1,i:n) */
 	    i__3 = *n - i__ + 1;
 	    dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
 		    1], &iinfo);
 	    if (i__ + ib <= *m) {
 /*              Form the triangular factor of the block reflector */
 /*              H = H(i) H(i+1) . . . H(i+ib-1) */
 		i__3 = *n - i__ + 1;
 		dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * 
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 /*              Apply H to A(i+ib:m,i:n) from the right */
 		i__3 = *m - i__ - ib + 1;
 		i__4 = *n - i__ + 1;
 		dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, 
 			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + 
 			1], &ldwork);
 	    }
 /* L10: */
 	}
    } else {
 	i__ = 1;
    }
 /*     Use unblocked code to factor the last or only block. */
    if (i__ <= k) {
 	i__2 = *m - i__ + 1;
 	i__1 = *n - i__ + 1;
 	dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
 		, &iinfo);
    }
    work[1] = (doublereal) iws;
    return 0;
 /*     End of DGELQF */
 } /* dgelqf_ */
--- a/lapack-netlib/SRC/dgelqt.c
+++ b/lapack-netlib/SRC/dgelqt.c
@@ -0,0 +1,621 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGELQT */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQRT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqt.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqt.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqt.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) */
 /*       INTEGER INFO, LDA, LDT, M, N, MB */
 /*       DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGELQT computes a blocked LQ factorization of a real M-by-N matrix A */
 /* > using the compact WY representation of Q. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] MB */
 /* > \verbatim */
 /* >          MB is INTEGER */
 /* >          The block size to be used in the blocked QR.  MIN(M,N) >= MB >= 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and below the diagonal of the array */
 /* >          contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is */
 /* >          lower triangular if M <= N); the elements above the diagonal */
 /* >          are the rows of V. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (LDT,MIN(M,N)) */
 /* >          The upper triangular block reflectors stored in compact form */
 /* >          as a sequence of upper triangular blocks.  See below */
 /* >          for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= MB. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MB*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2017 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th row */
 /* >  above the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1  v1 v1 v1 v1 ) */
 /* >                   (     1  v2 v2 v2 ) */
 /* >                   (         1 v3 v3 ) */
 /* > */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A. */
 /* >  Let K=MIN(M,N).  The number of blocks is B = ceiling(K/MB), where each */
 /* >  block is of order MB except for the last block, which is of order */
 /* >  IB = K - (B-1)*MB.  For each of the B blocks, a upper triangular block */
 /* >  reflector factor is computed: T1, T2, ..., TB.  The MB-by-MB (and IB-by-IB */
 /* >  for the last block) T's are stored in the MB-by-K matrix T as */
 /* > */
 /* >               T = (T1 T2 ... TB). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgelqt_(integer *m, integer *n, integer *mb, doublereal *
 	a, integer *lda, doublereal *t, integer *ldt, doublereal *work, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5;
    /* Local variables */
    integer i__, k, iinfo, ib;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *), xerbla_(char *, 
 	    integer *, ftnlen), dgelqt3_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, integer *, integer *);
 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */
 /* ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    --work;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*mb < 1 || *mb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldt < *mb) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGELQT", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    k = f2cmin(*m,*n);
    if (k == 0) {
 	return 0;
    }
 /*     Blocked loop of length K */
    i__1 = k;
    i__2 = *mb;
    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	i__3 = k - i__ + 1;
 	ib = f2cmin(i__3,*mb);
 /*     Compute the LQ factorization of the current block A(I:M,I:I+IB-1) */
 	i__3 = *n - i__ + 1;
 	dgelqt3_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + 1]
 		, ldt, &iinfo);
 	if (i__ + ib <= *m) {
 /*     Update by applying H**T to A(I:M,I+IB:N) from the right */
 	    i__3 = *m - i__ - ib + 1;
 	    i__4 = *n - i__ + 1;
 	    i__5 = *m - i__ - ib + 1;
 	    dlarfb_("R", "N", "F", "R", &i__3, &i__4, &ib, &a[i__ + i__ * 
 		    a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + ib + 
 		    i__ * a_dim1], lda, &work[1], &i__5);
 	}
    }
    return 0;
 /*     End of DGELQT */
 } /* dgelqt_ */
--- a/lapack-netlib/SRC/dgelqt3.c
+++ b/lapack-netlib/SRC/dgelqt3.c
@@ -0,0 +1,679 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static doublereal c_b7 = 1.;
 static doublereal c_b19 = -1.;
 /* > \brief \b DGELQT3 recursively computes a LQ factorization of a general real or complex matrix using the c
 ompact WY representation of Q. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQRT3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqt3
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqt3
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqt3
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*        SUBROUTINE DGELQT3( M, N, A, LDA, T, LDT, INFO ) */
 /*       INTEGER   INFO, LDA, M, N, LDT */
 /*       DOUBLE PRECISION   A( LDA, * ), T( LDT, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGELQT3 recursively computes a LQ factorization of a real M-by-N */
 /* > matrix A, using the compact WY representation of Q. */
 /* > */
 /* > Based on the algorithm of Elmroth and Gustavson, */
 /* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M =< N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
 /* >          below the diagonal contain the N-by-N lower triangular matrix L; the */
 /* >          elements above the diagonal are the rows of V.  See below for */
 /* >          further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (LDT,N) */
 /* >          The N-by-N upper triangular factor of the block reflector. */
 /* >          The elements on and above the diagonal contain the block */
 /* >          reflector T; the elements below the diagonal are not used. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2017 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th row */
 /* >  above the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1  v1 v1 v1 v1 ) */
 /* >                   (     1  v2 v2 v2 ) */
 /* >                   (     1  v3 v3 v3 ) */
 /* > */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
 /* >  block reflector H is then given by */
 /* > */
 /* >               H = I - V * T * V**T */
 /* > */
 /* >  where V**T is the transpose of V. */
 /* > */
 /* >  For details of the algorithm, see Elmroth and Gustavson (cited above). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgelqt3_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *t, integer *ldt, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2;
    /* Local variables */
    integer i__, j;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer iinfo;
    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *);
    integer i1, j1, m1, m2;
    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
 	     integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */
 /*  ===================================================================== */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < *m) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*ldt < f2cmax(1,*m)) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGELQT3", &i__1, (ftnlen)7);
 	return 0;
    }
    if (*m == 1) {
 /*        Compute Householder transform when N=1 */
 	dlarfg_(n, &a[a_offset], &a[f2cmin(2,*n) * a_dim1 + 1], lda, &t[t_offset]
 		);
    } else {
 /*        Otherwise, split A into blocks... */
 	m1 = *m / 2;
 	m2 = *m - m1;
 /* Computing MIN */
 	i__1 = m1 + 1;
 	i1 = f2cmin(i__1,*m);
 /* Computing MIN */
 	i__1 = *m + 1;
 	j1 = f2cmin(i__1,*n);
 /*        Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */
 	dgelqt3_(&m1, n, &a[a_offset], lda, &t[t_offset], ldt, &iinfo);
 /*        Compute A(J1:M,1:N) = Q1^H A(J1:M,1:N) [workspace: T(1:N1,J1:N)] */
 	i__1 = m2;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = m1;
 	    for (j = 1; j <= i__2; ++j) {
 		t[i__ + m1 + j * t_dim1] = a[i__ + m1 + j * a_dim1];
 	    }
 	}
 	dtrmm_("R", "U", "T", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 
 		+ t_dim1], ldt);
 	i__1 = *n - m1;
 	dgemm_("N", "T", &m2, &m1, &i__1, &c_b7, &a[i1 + i1 * a_dim1], lda, &
 		a[i1 * a_dim1 + 1], lda, &c_b7, &t[i1 + t_dim1], ldt);
 	dtrmm_("R", "U", "N", "N", &m2, &m1, &c_b7, &t[t_offset], ldt, &t[i1 
 		+ t_dim1], ldt);
 	i__1 = *n - m1;
 	dgemm_("N", "N", &m2, &i__1, &m1, &c_b19, &t[i1 + t_dim1], ldt, &a[i1 
 		* a_dim1 + 1], lda, &c_b7, &a[i1 + i1 * a_dim1], lda);
 	dtrmm_("R", "U", "N", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 
 		+ t_dim1], ldt);
 	i__1 = m2;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = m1;
 	    for (j = 1; j <= i__2; ++j) {
 		a[i__ + m1 + j * a_dim1] -= t[i__ + m1 + j * t_dim1];
 		t[i__ + m1 + j * t_dim1] = 0.;
 	    }
 	}
 /*        Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */
 	i__1 = *n - m1;
 	dgelqt3_(&m2, &i__1, &a[i1 + i1 * a_dim1], lda, &t[i1 + i1 * t_dim1], 
 		ldt, &iinfo);
 /*        Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 */
 	i__1 = m2;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = m1;
 	    for (j = 1; j <= i__2; ++j) {
 		t[j + (i__ + m1) * t_dim1] = a[j + (i__ + m1) * a_dim1];
 	    }
 	}
 	dtrmm_("R", "U", "T", "U", &m1, &m2, &c_b7, &a[i1 + i1 * a_dim1], lda,
 		 &t[i1 * t_dim1 + 1], ldt);
 	i__1 = *n - *m;
 	dgemm_("N", "T", &m1, &m2, &i__1, &c_b7, &a[j1 * a_dim1 + 1], lda, &a[
 		i1 + j1 * a_dim1], lda, &c_b7, &t[i1 * t_dim1 + 1], ldt);
 	dtrmm_("L", "U", "N", "N", &m1, &m2, &c_b19, &t[t_offset], ldt, &t[i1 
 		* t_dim1 + 1], ldt);
 	dtrmm_("R", "U", "N", "N", &m1, &m2, &c_b7, &t[i1 + i1 * t_dim1], ldt,
 		 &t[i1 * t_dim1 + 1], ldt);
 /*        Y = (Y1,Y2); L = [ L1            0  ];  T = [T1 T3] */
 /*                         [ A(1:N1,J1:N)  L2 ]       [ 0 T2] */
    }
    return 0;
 /*     End of DGELQT3 */
 } /* dgelqt3_ */
--- a/lapack-netlib/SRC/dgels.c
+++ b/lapack-netlib/SRC/dgels.c
@@ -0,0 +1,956 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static doublereal c_b33 = 0.;
 static integer c__0 = 0;
 /* > \brief <b> DGELS solves overdetermined or underdetermined systems for GE matrices</b> */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGELS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgels.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgels.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgels.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */
 /*                         INFO ) */
 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS */
 /*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGELS solves overdetermined or underdetermined real linear systems */
 /* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */
 /* > factorization of A.  It is assumed that A has full rank. */
 /* > */
 /* > The following options are provided: */
 /* > */
 /* > 1. If TRANS = 'N' and m >= n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A*X ||. */
 /* > */
 /* > 2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
 /* >    an underdetermined system A * X = B. */
 /* > */
 /* > 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of */
 /* >    an underdetermined system A**T * X = B. */
 /* > */
 /* > 4. If TRANS = 'T' and m < n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A**T * X ||. */
 /* > */
 /* > Several right hand side vectors b and solution vectors x can be */
 /* > handled in a single call; they are stored as the columns of the */
 /* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
 /* > matrix X. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N': the linear system involves A; */
 /* >          = 'T': the linear system involves A**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of */
 /* >          columns of the matrices B and X. NRHS >=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >            if M >= N, A is overwritten by details of its QR */
 /* >                       factorization as returned by DGEQRF; */
 /* >            if M <  N, A is overwritten by details of its LQ */
 /* >                       factorization as returned by DGELQF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
 /* >          On entry, the matrix B of right hand side vectors, stored */
 /* >          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
 /* >          if TRANS = 'T'. */
 /* >          On exit, if INFO = 0, B is overwritten by the solution */
 /* >          vectors, stored columnwise: */
 /* >          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
 /* >          squares solution vectors; the residual sum of squares for the */
 /* >          solution in each column is given by the sum of squares of */
 /* >          elements N+1 to M in that column; */
 /* >          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'T' and m >= n, rows 1 to M of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'T' and m < n, rows 1 to M of B contain the */
 /* >          least squares solution vectors; the residual sum of squares */
 /* >          for the solution in each column is given by the sum of */
 /* >          squares of elements M+1 to N in that column. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= MAX(1,M,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */
 /* >          For optimal performance, */
 /* >          LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS )*NB ). */
 /* >          where MN = f2cmin(M,N) and NB is the optimum block size. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO =  i, the i-th diagonal element of the */
 /* >                triangular factor of A is zero, so that A does not have */
 /* >                full rank; the least squares solution could not be */
 /* >                computed. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEsolve */
 /*  ===================================================================== */
 /* Subroutine */ int dgels_(char *trans, integer *m, integer *n, integer *
 	nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, 
 	doublereal *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    /* Local variables */
    doublereal anrm, bnrm;
    integer brow;
    logical tpsd;
    integer i__, j, iascl, ibscl;
    extern logical lsame_(char *, char *);
    integer wsize;
    doublereal rwork[1];
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    integer nb;
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *);
    integer mn;
    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *, integer *), 
 	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *, 
 	    integer *, integer *, doublereal *, integer *, integer *),
 	     dgeqrf_(integer *, integer *, doublereal *, integer *, 
 	    doublereal *, doublereal *, integer *, integer *), dlaset_(char *,
 	     integer *, integer *, doublereal *, doublereal *, doublereal *, 
 	    integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer scllen;
    doublereal bignum;
    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
 	    integer *, doublereal *, integer *, integer *), 
 	    dormqr_(char *, char *, integer *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *, integer *);
    doublereal smlnum;
    logical lquery;
    extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *, integer *, 
 	    integer *);
 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input arguments. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;
    /* Function Body */
    *info = 0;
    mn = f2cmin(*m,*n);
    lquery = *lwork == -1;
    if (! (lsame_(trans, "N") || lsame_(trans, "T"))) {
 	*info = -1;
    } else if (*m < 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -6;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m);
 	if (*ldb < f2cmax(i__1,*n)) {
 	    *info = -8;
 	} else /* if(complicated condition) */ {
 /* Computing MAX */
 	    i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs);
 	    if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 		*info = -10;
 	    }
 	}
    }
 /*     Figure out optimal block size */
    if (*info == 0 || *info == -10) {
 	tpsd = TRUE_;
 	if (lsame_(trans, "N")) {
 	    tpsd = FALSE_;
 	}
 	if (*m >= *n) {
 	    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    if (tpsd) {
 /* Computing MAX */
 		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LN", m, nrhs, n, &
 			c_n1, (ftnlen)6, (ftnlen)2);
 		nb = f2cmax(i__1,i__2);
 	    } else {
 /* Computing MAX */
 		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, &
 			c_n1, (ftnlen)6, (ftnlen)2);
 		nb = f2cmax(i__1,i__2);
 	    }
 	} else {
 	    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    if (tpsd) {
 /* Computing MAX */
 		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, &
 			c_n1, (ftnlen)6, (ftnlen)2);
 		nb = f2cmax(i__1,i__2);
 	    } else {
 /* Computing MAX */
 		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LN", n, nrhs, m, &
 			c_n1, (ftnlen)6, (ftnlen)2);
 		nb = f2cmax(i__1,i__2);
 	    }
 	}
 /* Computing MAX */
 	i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs) * nb;
 	wsize = f2cmax(i__1,i__2);
 	work[1] = (doublereal) wsize;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGELS ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
 /* Computing MIN */
    i__1 = f2cmin(*m,*n);
    if (f2cmin(i__1,*nrhs) == 0) {
 	i__1 = f2cmax(*m,*n);
 	dlaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
 	return 0;
    }
 /*     Get machine parameters */
    smlnum = dlamch_("S") / dlamch_("P");
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
 /*     Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */
    anrm = dlange_("M", m, n, &a[a_offset], lda, rwork);
    iascl = 0;
    if (anrm > 0. && anrm < smlnum) {
 /*        Scale matrix norm up to SMLNUM */
 	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 1;
    } else if (anrm > bignum) {
 /*        Scale matrix norm down to BIGNUM */
 	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 2;
    } else if (anrm == 0.) {
 /*        Matrix all zero. Return zero solution. */
 	i__1 = f2cmax(*m,*n);
 	dlaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
 	goto L50;
    }
    brow = *m;
    if (tpsd) {
 	brow = *n;
    }
    bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork);
    ibscl = 0;
    if (bnrm > 0. && bnrm < smlnum) {
 /*        Scale matrix norm up to SMLNUM */
 	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 1;
    } else if (bnrm > bignum) {
 /*        Scale matrix norm down to BIGNUM */
 	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 2;
    }
    if (*m >= *n) {
 /*        compute QR factorization of A */
 	i__1 = *lwork - mn;
 	dgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
 		;
 /*        workspace at least N, optimally N*NB */
 	if (! tpsd) {
 /*           Least-Squares Problem f2cmin || A * X - B || */
 /*           B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */
 	    i__1 = *lwork - mn;
 	    dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[
 		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
 /*           workspace at least NRHS, optimally NRHS*NB */
 /*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */
 	    dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset]
 		    , lda, &b[b_offset], ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 	    scllen = *n;
 	} else {
 /*           Underdetermined system of equations A**T * X = B */
 /*           B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */
 	    dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], 
 		    lda, &b[b_offset], ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 /*           B(N+1:M,1:NRHS) = ZERO */
 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = *n + 1; i__ <= i__2; ++i__) {
 		    b[i__ + j * b_dim1] = 0.;
 /* L10: */
 		}
 /* L20: */
 	    }
 /*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */
 	    i__1 = *lwork - mn;
 	    dormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, &
 		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
 /*           workspace at least NRHS, optimally NRHS*NB */
 	    scllen = *m;
 	}
    } else {
 /*        Compute LQ factorization of A */
 	i__1 = *lwork - mn;
 	dgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
 		;
 /*        workspace at least M, optimally M*NB. */
 	if (! tpsd) {
 /*           underdetermined system of equations A * X = B */
 /*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */
 	    dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset]
 		    , lda, &b[b_offset], ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 /*           B(M+1:N,1:NRHS) = 0 */
 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *n;
 		for (i__ = *m + 1; i__ <= i__2; ++i__) {
 		    b[i__ + j * b_dim1] = 0.;
 /* L30: */
 		}
 /* L40: */
 	    }
 /*           B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */
 	    i__1 = *lwork - mn;
 	    dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[
 		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
 /*           workspace at least NRHS, optimally NRHS*NB */
 	    scllen = *n;
 	} else {
 /*           overdetermined system f2cmin || A**T * X - B || */
 /*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */
 	    i__1 = *lwork - mn;
 	    dormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, &
 		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
 /*           workspace at least NRHS, optimally NRHS*NB */
 /*           B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */
 	    dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], 
 		    lda, &b[b_offset], ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 	    scllen = *m;
 	}
    }
 /*     Undo scaling */
    if (iascl == 1) {
 	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (iascl == 2) {
 	dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }
    if (ibscl == 1) {
 	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (ibscl == 2) {
 	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }
 L50:
    work[1] = (doublereal) wsize;
    return 0;
 /*     End of DGELS */
 } /* dgels_ */
--- a/lapack-netlib/SRC/dgelsd.c
+++ b/lapack-netlib/SRC/dgelsd.c
--- a/lapack-netlib/SRC/dgelss.c
+++ b/lapack-netlib/SRC/dgelss.c
--- a/lapack-netlib/SRC/dgelsy.c
+++ b/lapack-netlib/SRC/dgelsy.c
@@ -0,0 +1,945 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__0 = 0;
 static doublereal c_b31 = 0.;
 static integer c__2 = 2;
 static doublereal c_b54 = 1.;
 /* > \brief <b> DGELSY solves overdetermined or underdetermined systems for GE matrices</b> */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGELSY + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelsy.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelsy.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelsy.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, */
 /*                          WORK, LWORK, INFO ) */
 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */
 /*       DOUBLE PRECISION   RCOND */
 /*       INTEGER            JPVT( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGELSY computes the minimum-norm solution to a real linear least */
 /* > squares problem: */
 /* >     minimize || A * X - B || */
 /* > using a complete orthogonal factorization of A.  A is an M-by-N */
 /* > matrix which may be rank-deficient. */
 /* > */
 /* > Several right hand side vectors b and solution vectors x can be */
 /* > handled in a single call; they are stored as the columns of the */
 /* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
 /* > matrix X. */
 /* > */
 /* > The routine first computes a QR factorization with column pivoting: */
 /* >     A * P = Q * [ R11 R12 ] */
 /* >                 [  0  R22 ] */
 /* > with R11 defined as the largest leading submatrix whose estimated */
 /* > condition number is less than 1/RCOND.  The order of R11, RANK, */
 /* > is the effective rank of A. */
 /* > */
 /* > Then, R22 is considered to be negligible, and R12 is annihilated */
 /* > by orthogonal transformations from the right, arriving at the */
 /* > complete orthogonal factorization: */
 /* >    A * P = Q * [ T11 0 ] * Z */
 /* >                [  0  0 ] */
 /* > The minimum-norm solution is then */
 /* >    X = P * Z**T [ inv(T11)*Q1**T*B ] */
 /* >                 [        0         ] */
 /* > where Q1 consists of the first RANK columns of Q. */
 /* > */
 /* > This routine is basically identical to the original xGELSX except */
 /* > three differences: */
 /* >   o The call to the subroutine xGEQPF has been substituted by the */
 /* >     the call to the subroutine xGEQP3. This subroutine is a Blas-3 */
 /* >     version of the QR factorization with column pivoting. */
 /* >   o Matrix B (the right hand side) is updated with Blas-3. */
 /* >   o The permutation of matrix B (the right hand side) is faster and */
 /* >     more simple. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of */
 /* >          columns of matrices B and X. NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, A has been overwritten by details of its */
 /* >          complete orthogonal factorization. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
 /* >          On entry, the M-by-NRHS right hand side matrix B. */
 /* >          On exit, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,M,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] JPVT */
 /* > \verbatim */
 /* >          JPVT is INTEGER array, dimension (N) */
 /* >          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */
 /* >          to the front of AP, otherwise column i is a free column. */
 /* >          On exit, if JPVT(i) = k, then the i-th column of AP */
 /* >          was the k-th column of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] RCOND */
 /* > \verbatim */
 /* >          RCOND is DOUBLE PRECISION */
 /* >          RCOND is used to determine the effective rank of A, which */
 /* >          is defined as the order of the largest leading triangular */
 /* >          submatrix R11 in the QR factorization with pivoting of A, */
 /* >          whose estimated condition number < 1/RCOND. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RANK */
 /* > \verbatim */
 /* >          RANK is INTEGER */
 /* >          The effective rank of A, i.e., the order of the submatrix */
 /* >          R11.  This is the same as the order of the submatrix T11 */
 /* >          in the complete orthogonal factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          The unblocked strategy requires that: */
 /* >             LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */
 /* >          where MN = f2cmin( M, N ). */
 /* >          The block algorithm requires that: */
 /* >             LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */
 /* >          where NB is an upper bound on the blocksize returned */
 /* >          by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR, */
 /* >          and DORMRZ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: If INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEsolve */
 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n */
 /* >    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */
 /* >    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgelsy_(integer *m, integer *n, integer *nrhs, 
 	doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *
 	jpvt, doublereal *rcond, integer *rank, doublereal *work, integer *
 	lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    doublereal d__1, d__2;
    /* Local variables */
    doublereal anrm, bnrm, smin, smax;
    integer i__, j, iascl, ibscl;
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *);
    integer ismin, ismax;
    doublereal c1, c2;
    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *), dlaic1_(
 	    integer *, integer *, doublereal *, doublereal *, doublereal *, 
 	    doublereal *, doublereal *, doublereal *, doublereal *);
    doublereal wsize, s1, s2;
    extern /* Subroutine */ int dgeqp3_(integer *, integer *, doublereal *, 
 	    integer *, integer *, doublereal *, doublereal *, integer *, 
 	    integer *), dlabad_(doublereal *, doublereal *);
    integer nb;
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *);
    integer mn;
    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
 	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
 	    integer *, integer *), dlaset_(char *, integer *, integer 
 	    *, doublereal *, doublereal *, doublereal *, integer *), 
 	    xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    doublereal bignum;
    integer lwkmin, nb1, nb2, nb3, nb4;
    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
 	    integer *, doublereal *, integer *, integer *);
    doublereal sminpr, smaxpr, smlnum;
    extern /* Subroutine */ int dormrz_(char *, char *, integer *, integer *, 
 	    integer *, integer *, doublereal *, integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int dtzrzf_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *, integer *);
 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --jpvt;
    --work;
    /* Function Body */
    mn = f2cmin(*m,*n);
    ismin = mn + 1;
    ismax = (mn << 1) + 1;
 /*     Test the input arguments. */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m);
 	if (*ldb < f2cmax(i__1,*n)) {
 	    *info = -7;
 	}
    }
 /*     Figure out optimal block size */
    if (*info == 0) {
 	if (mn == 0 || *nrhs == 0) {
 	    lwkmin = 1;
 	    lwkopt = 1;
 	} else {
 	    nb1 = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb2 = ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb3 = ilaenv_(&c__1, "DORMQR", " ", m, n, nrhs, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    nb4 = ilaenv_(&c__1, "DORMRQ", " ", m, n, nrhs, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 /* Computing MAX */
 	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
 	    nb = f2cmax(i__1,nb4);
 /* Computing MAX */
 	    i__1 = mn << 1, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = mn + 
 		    *nrhs;
 	    lwkmin = mn + f2cmax(i__1,i__2);
 /* Computing MAX */
 	    i__1 = lwkmin, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = f2cmax(
 		    i__1,i__2), i__2 = (mn << 1) + nb * *nrhs;
 	    lwkopt = f2cmax(i__1,i__2);
 	}
 	work[1] = (doublereal) lwkopt;
 	if (*lwork < lwkmin && ! lquery) {
 	    *info = -12;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGELSY", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    if (mn == 0 || *nrhs == 0) {
 	*rank = 0;
 	return 0;
    }
 /*     Get machine parameters */
    smlnum = dlamch_("S") / dlamch_("P");
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
 /*     Scale A, B if f2cmax entries outside range [SMLNUM,BIGNUM] */
    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
    iascl = 0;
    if (anrm > 0. && anrm < smlnum) {
 /*        Scale matrix norm up to SMLNUM */
 	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 1;
    } else if (anrm > bignum) {
 /*        Scale matrix norm down to BIGNUM */
 	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 2;
    } else if (anrm == 0.) {
 /*        Matrix all zero. Return zero solution. */
 	i__1 = f2cmax(*m,*n);
 	dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb);
 	*rank = 0;
 	goto L70;
    }
    bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
    ibscl = 0;
    if (bnrm > 0. && bnrm < smlnum) {
 /*        Scale matrix norm up to SMLNUM */
 	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
 		 info);
 	ibscl = 1;
    } else if (bnrm > bignum) {
 /*        Scale matrix norm down to BIGNUM */
 	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
 		 info);
 	ibscl = 2;
    }
 /*     Compute QR factorization with column pivoting of A: */
 /*        A * P = Q * R */
    i__1 = *lwork - mn;
    dgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1,
 	     info);
    wsize = mn + work[mn + 1];
 /*     workspace: MN+2*N+NB*(N+1). */
 /*     Details of Householder rotations stored in WORK(1:MN). */
 /*     Determine RANK using incremental condition estimation */
    work[ismin] = 1.;
    work[ismax] = 1.;
    smax = (d__1 = a[a_dim1 + 1], abs(d__1));
    smin = smax;
    if ((d__1 = a[a_dim1 + 1], abs(d__1)) == 0.) {
 	*rank = 0;
 	i__1 = f2cmax(*m,*n);
 	dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb);
 	goto L70;
    } else {
 	*rank = 1;
    }
 L10:
    if (*rank < mn) {
 	i__ = *rank + 1;
 	dlaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[
 		i__ + i__ * a_dim1], &sminpr, &s1, &c1);
 	dlaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[
 		i__ + i__ * a_dim1], &smaxpr, &s2, &c2);
 	if (smaxpr * *rcond <= sminpr) {
 	    i__1 = *rank;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1];
 		work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1];
 /* L20: */
 	    }
 	    work[ismin + *rank] = c1;
 	    work[ismax + *rank] = c2;
 	    smin = sminpr;
 	    smax = smaxpr;
 	    ++(*rank);
 	    goto L10;
 	}
    }
 /*     workspace: 3*MN. */
 /*     Logically partition R = [ R11 R12 ] */
 /*                             [  0  R22 ] */
 /*     where R11 = R(1:RANK,1:RANK) */
 /*     [R11,R12] = [ T11, 0 ] * Y */
    if (*rank < *n) {
 	i__1 = *lwork - (mn << 1);
 	dtzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + 
 		1], &i__1, info);
    }
 /*     workspace: 2*MN. */
 /*     Details of Householder rotations stored in WORK(MN+1:2*MN) */
 /*     B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */
    i__1 = *lwork - (mn << 1);
    dormqr_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], &
 	    b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info);
 /* Computing MAX */
    d__1 = wsize, d__2 = (mn << 1) + work[(mn << 1) + 1];
    wsize = f2cmax(d__1,d__2);
 /*     workspace: 2*MN+NB*NRHS. */
 /*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */
    dtrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b54, &
 	    a[a_offset], lda, &b[b_offset], ldb);
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *n;
 	for (i__ = *rank + 1; i__ <= i__2; ++i__) {
 	    b[i__ + j * b_dim1] = 0.;
 /* L30: */
 	}
 /* L40: */
    }
 /*     B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) */
    if (*rank < *n) {
 	i__1 = *n - *rank;
 	i__2 = *lwork - (mn << 1);
 	dormrz_("Left", "Transpose", n, nrhs, rank, &i__1, &a[a_offset], lda, 
 		&work[mn + 1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__2,
 		 info);
    }
 /*     workspace: 2*MN+NRHS. */
 /*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    work[jpvt[i__]] = b[i__ + j * b_dim1];
 /* L50: */
 	}
 	dcopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1);
 /* L60: */
    }
 /*     workspace: N. */
 /*     Undo scaling */
    if (iascl == 1) {
 	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
 		 info);
 	dlascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], 
 		lda, info);
    } else if (iascl == 2) {
 	dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
 		 info);
 	dlascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], 
 		lda, info);
    }
    if (ibscl == 1) {
 	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
 		 info);
    } else if (ibscl == 2) {
 	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
 		 info);
    }
 L70:
    work[1] = (doublereal) lwkopt;
    return 0;
 /*     End of DGELSY */
 } /* dgelsy_ */
--- a/lapack-netlib/SRC/dgemlq.c
+++ b/lapack-netlib/SRC/dgemlq.c
@@ -0,0 +1,684 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGEMLQ */
 /*  Definition: */
 /*  =========== */
 /*      SUBROUTINE DGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, */
 /*     $                   TSIZE, C, LDC, WORK, LWORK, INFO ) */
 /*      CHARACTER          SIDE, TRANS */
 /*      INTEGER            INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */
 /*      DOUBLE PRECISION   A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* >     DGEMLQ overwrites the general real M-by-N matrix C with */
 /* > */
 /* >                    SIDE = 'L'     SIDE = 'R' */
 /* >    TRANS = 'N':      Q * C          C * Q */
 /* >    TRANS = 'T':      Q**T * C       C * Q**T */
 /* >    where Q is a real orthogonal matrix defined as the product */
 /* >    of blocked elementary reflectors computed by short wide LQ */
 /* >    factorization (DGELQ) */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'L': apply Q or Q**T from the Left; */
 /* >          = 'R': apply Q or Q**T from the Right. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N':  No transpose, apply Q; */
 /* >          = 'T':  Transpose, apply Q**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix C. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* >          The number of elementary reflectors whose product defines */
 /* >          the matrix Q. */
 /* >          If SIDE = 'L', M >= K >= 0; */
 /* >          if SIDE = 'R', N >= K >= 0. */
 /* > */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension */
 /* >                               (LDA,M) if SIDE = 'L', */
 /* >                               (LDA,N) if SIDE = 'R' */
 /* >          Part of the data structure to represent Q as returned by DGELQ. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,K). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)). */
 /* >          Part of the data structure to represent Q as returned by DGELQ. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TSIZE */
 /* > \verbatim */
 /* >          TSIZE is INTEGER */
 /* >          The dimension of the array T. TSIZE >= 5. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is DOUBLE PRECISION array, dimension (LDC,N) */
 /* >          On entry, the M-by-N matrix C. */
 /* >          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDC */
 /* > \verbatim */
 /* >          LDC is INTEGER */
 /* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >         (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1, then a workspace query is assumed. The routine */
 /* >          only calculates the size of the WORK array, returns this */
 /* >          value as WORK(1), and no error message related to WORK */
 /* >          is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \par Further Details */
 /*  ==================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > These details are particular for this LAPACK implementation. Users should not */
 /* > take them for granted. These details may change in the future, and are not likely */
 /* > true for another LAPACK implementation. These details are relevant if one wants */
 /* > to try to understand the code. They are not part of the interface. */
 /* > */
 /* > In this version, */
 /* > */
 /* >          T(2): row block size (MB) */
 /* >          T(3): column block size (NB) */
 /* >          T(6:TSIZE): data structure needed for Q, computed by */
 /* >                           DLASWLQ or DGELQT */
 /* > */
 /* >  Depending on the matrix dimensions M and N, and row and column */
 /* >  block sizes MB and NB returned by ILAENV, DGELQ will use either */
 /* >  DLASWLQ (if the matrix is wide-and-short) or DGELQT to compute */
 /* >  the LQ factorization. */
 /* >  This version of DGEMLQ will use either DLAMSWLQ or DGEMLQT to */
 /* >  multiply matrix Q by another matrix. */
 /* >  Further Details in DLAMSWLQ or DGEMLQT. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgemlq_(char *side, char *trans, integer *m, integer *n, 
 	integer *k, doublereal *a, integer *lda, doublereal *t, integer *
 	tsize, doublereal *c__, integer *ldc, doublereal *work, integer *
 	lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, c_dim1, c_offset, i__1;
    /* Local variables */
    logical left, tran;
    extern /* Subroutine */ int dlamswlq_(char *, char *, integer *, integer *
 	    , integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *, integer *);
    extern logical lsame_(char *, char *);
    logical right;
    integer mb, nb, mn, lw, nblcks;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical notran, lquery;
    extern /* Subroutine */ int dgemlqt_(char *, char *, integer *, integer *,
 	     integer *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, integer *, doublereal *, integer *);
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /* ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --t;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;
    /* Function Body */
    lquery = *lwork == -1;
    notran = lsame_(trans, "N");
    tran = lsame_(trans, "T");
    left = lsame_(side, "L");
    right = lsame_(side, "R");
    mb = (integer) t[2];
    nb = (integer) t[3];
    if (left) {
 	lw = *n * mb;
 	mn = *m;
    } else {
 	lw = *m * mb;
 	mn = *n;
    }
    if (nb > *k && mn > *k) {
 	if ((mn - *k) % (nb - *k) == 0) {
 	    nblcks = (mn - *k) / (nb - *k);
 	} else {
 	    nblcks = (mn - *k) / (nb - *k) + 1;
 	}
    } else {
 	nblcks = 1;
    }
    *info = 0;
    if (! left && ! right) {
 	*info = -1;
    } else if (! tran && ! notran) {
 	*info = -2;
    } else if (*m < 0) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*k < 0 || *k > mn) {
 	*info = -5;
    } else if (*lda < f2cmax(1,*k)) {
 	*info = -7;
    } else if (*tsize < 5) {
 	*info = -9;
    } else if (*ldc < f2cmax(1,*m)) {
 	*info = -11;
    } else if (*lwork < f2cmax(1,lw) && ! lquery) {
 	*info = -13;
    }
    if (*info == 0) {
 	work[1] = (doublereal) lw;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEMLQ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
 /* Computing MIN */
    i__1 = f2cmin(*m,*n);
    if (f2cmin(i__1,*k) == 0) {
 	return 0;
    }
 /* Computing MAX */
    i__1 = f2cmax(*m,*n);
    if (left && *m <= *k || right && *n <= *k || nb <= *k || nb >= f2cmax(i__1,*
 	    k)) {
 	dgemlqt_(side, trans, m, n, k, &mb, &a[a_offset], lda, &t[6], &mb, &
 		c__[c_offset], ldc, &work[1], info);
    } else {
 	dlamswlq_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], &
 		mb, &c__[c_offset], ldc, &work[1], lwork, info);
    }
    work[1] = (doublereal) lw;
    return 0;
 /*     End of DGEMLQ */
 } /* dgemlq_ */
--- a/lapack-netlib/SRC/dgemlqt.c
+++ b/lapack-netlib/SRC/dgemlqt.c
@@ -0,0 +1,707 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGEMLQT */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEMLQT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgemlqt
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgemlqt
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgemlqt
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, */
 /*                          C, LDC, WORK, INFO ) */
 /*       CHARACTER SIDE, TRANS */
 /*       INTEGER   INFO, K, LDV, LDC, M, N, MB, LDT */
 /*       DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEMLQT overwrites the general real M-by-N matrix C with */
 /* > */
 /* >                 SIDE = 'L'     SIDE = 'R' */
 /* > TRANS = 'N':      Q C            C Q */
 /* > TRANS = 'T':   Q**T C            C Q**T */
 /* > */
 /* > where Q is a real orthogonal matrix defined as the product of K */
 /* > elementary reflectors: */
 /* > */
 /* >       Q = H(1) H(2) . . . H(K) = I - V T V**T */
 /* > */
 /* > generated using the compact WY representation as returned by DGELQT. */
 /* > */
 /* > Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'L': apply Q or Q**T from the Left; */
 /* >          = 'R': apply Q or Q**T from the Right. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N':  No transpose, apply Q; */
 /* >          = 'C':  Transpose, apply Q**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix C. M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix C. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* >          The number of elementary reflectors whose product defines */
 /* >          the matrix Q. */
 /* >          If SIDE = 'L', M >= K >= 0; */
 /* >          if SIDE = 'R', N >= K >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] MB */
 /* > \verbatim */
 /* >          MB is INTEGER */
 /* >          The block size used for the storage of T.  K >= MB >= 1. */
 /* >          This must be the same value of MB used to generate T */
 /* >          in DGELQT. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] V */
 /* > \verbatim */
 /* >          V is DOUBLE PRECISION array, dimension */
 /* >                               (LDV,M) if SIDE = 'L', */
 /* >                               (LDV,N) if SIDE = 'R' */
 /* >          The i-th row must contain the vector which defines the */
 /* >          elementary reflector H(i), for i = 1,2,...,k, as returned by */
 /* >          DGELQT in the first K rows of its array argument A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the array V. LDV >= f2cmax(1,K). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (LDT,K) */
 /* >          The upper triangular factors of the block reflectors */
 /* >          as returned by DGELQT, stored as a MB-by-K matrix. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= MB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is DOUBLE PRECISION array, dimension (LDC,N) */
 /* >          On entry, the M-by-N matrix C. */
 /* >          On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDC */
 /* > \verbatim */
 /* >          LDC is INTEGER */
 /* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array. The dimension of */
 /* >          WORK is N*MB if SIDE = 'L', or  M*MB if SIDE = 'R'. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2017 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgemlqt_(char *side, char *trans, integer *m, integer *n,
 	 integer *k, integer *mb, doublereal *v, integer *ldv, doublereal *t, 
 	integer *ldt, doublereal *c__, integer *ldc, doublereal *work, 
 	integer *info)
 {
    /* System generated locals */
    integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, 
 	    i__3, i__4;
    /* Local variables */
    logical left, tran;
    integer i__;
    extern logical lsame_(char *, char *);
    logical right;
    integer ib, kf;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *), xerbla_(char *, 
 	    integer *, ftnlen);
    logical notran;
    integer ldwork;
 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */
 /*  ===================================================================== */
    /* Parameter adjustments */
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;
    /* Function Body */
    *info = 0;
    left = lsame_(side, "L");
    right = lsame_(side, "R");
    tran = lsame_(trans, "T");
    notran = lsame_(trans, "N");
    if (left) {
 	ldwork = f2cmax(1,*n);
    } else if (right) {
 	ldwork = f2cmax(1,*m);
    }
    if (! left && ! right) {
 	*info = -1;
    } else if (! tran && ! notran) {
 	*info = -2;
    } else if (*m < 0) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*k < 0) {
 	*info = -5;
    } else if (*mb < 1 || *mb > *k && *k > 0) {
 	*info = -6;
    } else if (*ldv < f2cmax(1,*k)) {
 	*info = -8;
    } else if (*ldt < *mb) {
 	*info = -10;
    } else if (*ldc < f2cmax(1,*m)) {
 	*info = -12;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEMLQT", &i__1, (ftnlen)7);
 	return 0;
    }
    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
    }
    if (left && notran) {
 	i__1 = *k;
 	i__2 = *mb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = *mb, i__4 = *k - i__ + 1;
 	    ib = f2cmin(i__3,i__4);
 	    i__3 = *m - i__ + 1;
 	    dlarfb_("L", "T", "F", "R", &i__3, n, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
 		    &work[1], &ldwork);
 	}
    } else if (right && tran) {
 	i__2 = *k;
 	i__1 = *mb;
 	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
 /* Computing MIN */
 	    i__3 = *mb, i__4 = *k - i__ + 1;
 	    ib = f2cmin(i__3,i__4);
 	    i__3 = *n - i__ + 1;
 	    dlarfb_("R", "N", "F", "R", m, &i__3, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
 		    ldc, &work[1], &ldwork);
 	}
    } else if (left && tran) {
 	kf = (*k - 1) / *mb * *mb + 1;
 	i__1 = -(*mb);
 	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
 /* Computing MIN */
 	    i__2 = *mb, i__3 = *k - i__ + 1;
 	    ib = f2cmin(i__2,i__3);
 	    i__2 = *m - i__ + 1;
 	    dlarfb_("L", "N", "F", "R", &i__2, n, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
 		    &work[1], &ldwork);
 	}
    } else if (right && notran) {
 	kf = (*k - 1) / *mb * *mb + 1;
 	i__1 = -(*mb);
 	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
 /* Computing MIN */
 	    i__2 = *mb, i__3 = *k - i__ + 1;
 	    ib = f2cmin(i__2,i__3);
 	    i__2 = *n - i__ + 1;
 	    dlarfb_("R", "T", "F", "R", m, &i__2, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
 		    ldc, &work[1], &ldwork);
 	}
    }
    return 0;
 /*     End of DGEMLQT */
 } /* dgemlqt_ */
--- a/lapack-netlib/SRC/dgemqr.c
+++ b/lapack-netlib/SRC/dgemqr.c
@@ -0,0 +1,685 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGEMQR */
 /*  Definition: */
 /*  =========== */
 /*      SUBROUTINE DGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, */
 /*     $                   TSIZE, C, LDC, WORK, LWORK, INFO ) */
 /*     CHARACTER         SIDE, TRANS */
 /*     INTEGER           INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */
 /*     DOUBLE PRECISION  A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEMQR overwrites the general real M-by-N matrix C with */
 /* > */
 /* >                      SIDE = 'L'     SIDE = 'R' */
 /* >      TRANS = 'N':      Q * C          C * Q */
 /* >      TRANS = 'T':      Q**T * C       C * Q**T */
 /* > */
 /* > where Q is a real orthogonal matrix defined as the product */
 /* > of blocked elementary reflectors computed by tall skinny */
 /* > QR factorization (DGEQR) */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'L': apply Q or Q**T from the Left; */
 /* >          = 'R': apply Q or Q**T from the Right. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N':  No transpose, apply Q; */
 /* >          = 'T':  Transpose, apply Q**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix C. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* >          The number of elementary reflectors whose product defines */
 /* >          the matrix Q. */
 /* >          If SIDE = 'L', M >= K >= 0; */
 /* >          if SIDE = 'R', N >= K >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,K) */
 /* >          Part of the data structure to represent Q as returned by DGEQR. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. */
 /* >          If SIDE = 'L', LDA >= f2cmax(1,M); */
 /* >          if SIDE = 'R', LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)). */
 /* >          Part of the data structure to represent Q as returned by DGEQR. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TSIZE */
 /* > \verbatim */
 /* >          TSIZE is INTEGER */
 /* >          The dimension of the array T. TSIZE >= 5. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is DOUBLE PRECISION array, dimension (LDC,N) */
 /* >          On entry, the M-by-N matrix C. */
 /* >          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDC */
 /* > \verbatim */
 /* >          LDC is INTEGER */
 /* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >         (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1, then a workspace query is assumed. The routine */
 /* >          only calculates the size of the WORK array, returns this */
 /* >          value as WORK(1), and no error message related to WORK */
 /* >          is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \par Further Details */
 /*  ==================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > These details are particular for this LAPACK implementation. Users should not */
 /* > take them for granted. These details may change in the future, and are not likely */
 /* > true for another LAPACK implementation. These details are relevant if one wants */
 /* > to try to understand the code. They are not part of the interface. */
 /* > */
 /* > In this version, */
 /* > */
 /* >          T(2): row block size (MB) */
 /* >          T(3): column block size (NB) */
 /* >          T(6:TSIZE): data structure needed for Q, computed by */
 /* >                           DLATSQR or DGEQRT */
 /* > */
 /* >  Depending on the matrix dimensions M and N, and row and column */
 /* >  block sizes MB and NB returned by ILAENV, DGEQR will use either */
 /* >  DLATSQR (if the matrix is tall-and-skinny) or DGEQRT to compute */
 /* >  the QR factorization. */
 /* >  This version of DGEMQR will use either DLAMTSQR or DGEMQRT to */
 /* >  multiply matrix Q by another matrix. */
 /* >  Further Details in DLATMSQR or DGEMQRT. */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgemqr_(char *side, char *trans, integer *m, integer *n, 
 	integer *k, doublereal *a, integer *lda, doublereal *t, integer *
 	tsize, doublereal *c__, integer *ldc, doublereal *work, integer *
 	lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, c_dim1, c_offset, i__1;
    /* Local variables */
    logical left, tran;
    extern /* Subroutine */ int dlamtsqr_(char *, char *, integer *, integer *
 	    , integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *, integer *);
    extern logical lsame_(char *, char *);
    logical right;
    integer mb, nb, mn, lw, nblcks;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical notran, lquery;
    extern /* Subroutine */ int dgemqrt_(char *, char *, integer *, integer *,
 	     integer *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, integer *, doublereal *, integer *);
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /* ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --t;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;
    /* Function Body */
    lquery = *lwork == -1;
    notran = lsame_(trans, "N");
    tran = lsame_(trans, "T");
    left = lsame_(side, "L");
    right = lsame_(side, "R");
    mb = (integer) t[2];
    nb = (integer) t[3];
    if (left) {
 	lw = *n * nb;
 	mn = *m;
    } else {
 	lw = mb * nb;
 	mn = *n;
    }
    if (mb > *k && mn > *k) {
 	if ((mn - *k) % (mb - *k) == 0) {
 	    nblcks = (mn - *k) / (mb - *k);
 	} else {
 	    nblcks = (mn - *k) / (mb - *k) + 1;
 	}
    } else {
 	nblcks = 1;
    }
    *info = 0;
    if (! left && ! right) {
 	*info = -1;
    } else if (! tran && ! notran) {
 	*info = -2;
    } else if (*m < 0) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*k < 0 || *k > mn) {
 	*info = -5;
    } else if (*lda < f2cmax(1,mn)) {
 	*info = -7;
    } else if (*tsize < 5) {
 	*info = -9;
    } else if (*ldc < f2cmax(1,*m)) {
 	*info = -11;
    } else if (*lwork < f2cmax(1,lw) && ! lquery) {
 	*info = -13;
    }
    if (*info == 0) {
 	work[1] = (doublereal) lw;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEMQR", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
 /* Computing MIN */
    i__1 = f2cmin(*m,*n);
    if (f2cmin(i__1,*k) == 0) {
 	return 0;
    }
 /* Computing MAX */
    i__1 = f2cmax(*m,*n);
    if (left && *m <= *k || right && *n <= *k || mb <= *k || mb >= f2cmax(i__1,*
 	    k)) {
 	dgemqrt_(side, trans, m, n, k, &nb, &a[a_offset], lda, &t[6], &nb, &
 		c__[c_offset], ldc, &work[1], info);
    } else {
 	dlamtsqr_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], &
 		nb, &c__[c_offset], ldc, &work[1], lwork, info);
    }
    work[1] = (doublereal) lw;
    return 0;
 /*     End of DGEMQR */
 } /* dgemqr_ */
--- a/lapack-netlib/SRC/dgemqrt.c
+++ b/lapack-netlib/SRC/dgemqrt.c
@@ -0,0 +1,708 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGEMQRT */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEMQRT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgemqrt
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgemqrt
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgemqrt
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, */
 /*                          C, LDC, WORK, INFO ) */
 /*       CHARACTER SIDE, TRANS */
 /*       INTEGER   INFO, K, LDV, LDC, M, N, NB, LDT */
 /*       DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEMQRT overwrites the general real M-by-N matrix C with */
 /* > */
 /* >                 SIDE = 'L'     SIDE = 'R' */
 /* > TRANS = 'N':      Q C            C Q */
 /* > TRANS = 'T':   Q**T C            C Q**T */
 /* > */
 /* > where Q is a real orthogonal matrix defined as the product of K */
 /* > elementary reflectors: */
 /* > */
 /* >       Q = H(1) H(2) . . . H(K) = I - V T V**T */
 /* > */
 /* > generated using the compact WY representation as returned by DGEQRT. */
 /* > */
 /* > Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'L': apply Q or Q**T from the Left; */
 /* >          = 'R': apply Q or Q**T from the Right. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N':  No transpose, apply Q; */
 /* >          = 'C':  Transpose, apply Q**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix C. M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix C. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* >          The number of elementary reflectors whose product defines */
 /* >          the matrix Q. */
 /* >          If SIDE = 'L', M >= K >= 0; */
 /* >          if SIDE = 'R', N >= K >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB */
 /* > \verbatim */
 /* >          NB is INTEGER */
 /* >          The block size used for the storage of T.  K >= NB >= 1. */
 /* >          This must be the same value of NB used to generate T */
 /* >          in CGEQRT. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] V */
 /* > \verbatim */
 /* >          V is DOUBLE PRECISION array, dimension (LDV,K) */
 /* >          The i-th column must contain the vector which defines the */
 /* >          elementary reflector H(i), for i = 1,2,...,k, as returned by */
 /* >          CGEQRT in the first K columns of its array argument A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the array V. */
 /* >          If SIDE = 'L', LDA >= f2cmax(1,M); */
 /* >          if SIDE = 'R', LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (LDT,K) */
 /* >          The upper triangular factors of the block reflectors */
 /* >          as returned by CGEQRT, stored as a NB-by-N matrix. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= NB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is DOUBLE PRECISION array, dimension (LDC,N) */
 /* >          On entry, the M-by-N matrix C. */
 /* >          On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDC */
 /* > \verbatim */
 /* >          LDC is INTEGER */
 /* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array. The dimension of */
 /* >          WORK is N*NB if SIDE = 'L', or  M*NB if SIDE = 'R'. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgemqrt_(char *side, char *trans, integer *m, integer *n,
 	 integer *k, integer *nb, doublereal *v, integer *ldv, doublereal *t, 
 	integer *ldt, doublereal *c__, integer *ldc, doublereal *work, 
 	integer *info)
 {
    /* System generated locals */
    integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, 
 	    i__3, i__4;
    /* Local variables */
    logical left, tran;
    integer i__, q;
    extern logical lsame_(char *, char *);
    logical right;
    integer ib, kf;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *), xerbla_(char *, integer *, ftnlen);
    logical notran;
    integer ldwork;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
    /* Parameter adjustments */
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;
    /* Function Body */
    *info = 0;
    left = lsame_(side, "L");
    right = lsame_(side, "R");
    tran = lsame_(trans, "T");
    notran = lsame_(trans, "N");
    if (left) {
 	ldwork = f2cmax(1,*n);
 	q = *m;
    } else if (right) {
 	ldwork = f2cmax(1,*m);
 	q = *n;
    }
    if (! left && ! right) {
 	*info = -1;
    } else if (! tran && ! notran) {
 	*info = -2;
    } else if (*m < 0) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*k < 0 || *k > q) {
 	*info = -5;
    } else if (*nb < 1 || *nb > *k && *k > 0) {
 	*info = -6;
    } else if (*ldv < f2cmax(1,q)) {
 	*info = -8;
    } else if (*ldt < *nb) {
 	*info = -10;
    } else if (*ldc < f2cmax(1,*m)) {
 	*info = -12;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEMQRT", &i__1, (ftnlen)7);
 	return 0;
    }
    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
    }
    if (left && tran) {
 	i__1 = *k;
 	i__2 = *nb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = *nb, i__4 = *k - i__ + 1;
 	    ib = f2cmin(i__3,i__4);
 	    i__3 = *m - i__ + 1;
 	    dlarfb_("L", "T", "F", "C", &i__3, n, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
 		    &work[1], &ldwork);
 	}
    } else if (right && notran) {
 	i__2 = *k;
 	i__1 = *nb;
 	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
 /* Computing MIN */
 	    i__3 = *nb, i__4 = *k - i__ + 1;
 	    ib = f2cmin(i__3,i__4);
 	    i__3 = *n - i__ + 1;
 	    dlarfb_("R", "N", "F", "C", m, &i__3, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
 		    ldc, &work[1], &ldwork);
 	}
    } else if (left && notran) {
 	kf = (*k - 1) / *nb * *nb + 1;
 	i__1 = -(*nb);
 	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
 /* Computing MIN */
 	    i__2 = *nb, i__3 = *k - i__ + 1;
 	    ib = f2cmin(i__2,i__3);
 	    i__2 = *m - i__ + 1;
 	    dlarfb_("L", "N", "F", "C", &i__2, n, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
 		    &work[1], &ldwork);
 	}
    } else if (right && tran) {
 	kf = (*k - 1) / *nb * *nb + 1;
 	i__1 = -(*nb);
 	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
 /* Computing MIN */
 	    i__2 = *nb, i__3 = *k - i__ + 1;
 	    ib = f2cmin(i__2,i__3);
 	    i__2 = *n - i__ + 1;
 	    dlarfb_("R", "T", "F", "C", m, &i__2, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
 		    ldc, &work[1], &ldwork);
 	}
    }
    return 0;
 /*     End of DGEMQRT */
 } /* dgemqrt_ */
--- a/lapack-netlib/SRC/dgeql2.c
+++ b/lapack-netlib/SRC/dgeql2.c
@@ -0,0 +1,591 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 /* > \brief \b DGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorit
 hm. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQL2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeql2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeql2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeql2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQL2 computes a QL factorization of a real m by n matrix A: */
 /* > A = Q * L. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, if m >= n, the lower triangle of the subarray */
 /* >          A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; */
 /* >          if m <= n, the elements on and below the (n-m)-th */
 /* >          superdiagonal contain the m by n lower trapezoidal matrix L; */
 /* >          the remaining elements, with the array TAU, represent the */
 /* >          orthogonal matrix Q as a product of elementary reflectors */
 /* >          (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */
 /* >  A(1:m-k+i-1,n-k+i), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeql2_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *), dlarfg_(integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
    doublereal aii;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQL2", &i__1, (ftnlen)6);
 	return 0;
    }
    k = f2cmin(*m,*n);
    for (i__ = k; i__ >= 1; --i__) {
 /*        Generate elementary reflector H(i) to annihilate */
 /*        A(1:m-k+i-1,n-k+i) */
 	i__1 = *m - k + i__;
 	dlarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[(*n - k 
 		+ i__) * a_dim1 + 1], &c__1, &tau[i__]);
 /*        Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left */
 	aii = a[*m - k + i__ + (*n - k + i__) * a_dim1];
 	a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.;
 	i__1 = *m - k + i__;
 	i__2 = *n - k + i__ - 1;
 	dlarf_("Left", &i__1, &i__2, &a[(*n - k + i__) * a_dim1 + 1], &c__1, &
 		tau[i__], &a[a_offset], lda, &work[1]);
 	a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii;
 /* L10: */
    }
    return 0;
 /*     End of DGEQL2 */
 } /* dgeql2_ */
--- a/lapack-netlib/SRC/dgeqlf.c
+++ b/lapack-netlib/SRC/dgeqlf.c
@@ -0,0 +1,711 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 /* > \brief \b DGEQLF */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQLF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqlf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqlf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqlf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQLF computes a QL factorization of a real M-by-N matrix A: */
 /* > A = Q * L. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >          if m >= n, the lower triangle of the subarray */
 /* >          A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; */
 /* >          if m <= n, the elements on and below the (n-m)-th */
 /* >          superdiagonal contain the M-by-N lower trapezoidal matrix L; */
 /* >          the remaining elements, with the array TAU, represent the */
 /* >          orthogonal matrix Q as a product of elementary reflectors */
 /* >          (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For optimum performance LWORK >= N*NB, where NB is the */
 /* >          optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */
 /* >  A(1:m-k+i-1,n-k+i), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqlf_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int dgeql2_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer ib, nb, ki, kk;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer mu, nu, nx;
    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info == 0) {
 	k = f2cmin(*m,*n);
 	if (k == 0) {
 	    lwkopt = 1;
 	} else {
 	    nb = ilaenv_(&c__1, "DGEQLF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    lwkopt = *n * nb;
 	}
 	work[1] = (doublereal) lwkopt;
 	if (*lwork < f2cmax(1,*n) && ! lquery) {
 	    *info = -7;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQLF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    if (k == 0) {
 	return 0;
    }
    nbmin = 2;
    nx = 1;
    iws = *n;
    if (nb > 1 && nb < k) {
 /*        Determine when to cross over from blocked to unblocked code. */
 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQLF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {
 /*           Determine if workspace is large enough for blocked code. */
 	    ldwork = *n;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {
 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */
 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQLF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }
    if (nb >= nbmin && nb < k && nx < k) {
 /*        Use blocked code initially. */
 /*        The last kk columns are handled by the block method. */
 	ki = (k - nx - 1) / nb * nb;
 /* Computing MIN */
 	i__1 = k, i__2 = ki + nb;
 	kk = f2cmin(i__1,i__2);
 	i__1 = k - kk + 1;
 	i__2 = -nb;
 	for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ 
 		+= i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);
 /*           Compute the QL factorization of the current block */
 /*           A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */
 	    i__3 = *m - k + i__ + ib - 1;
 	    dgeql2_(&i__3, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &tau[
 		    i__], &work[1], &iinfo);
 	    if (*n - k + i__ > 1) {
 /*              Form the triangular factor of the block reflector */
 /*              H = H(i+ib-1) . . . H(i+1) H(i) */
 		i__3 = *m - k + i__ + ib - 1;
 		dlarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - k + 
 			i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork);
 /*              Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */
 		i__3 = *m - k + i__ + ib - 1;
 		i__4 = *n - k + i__ - 1;
 		dlarfb_("Left", "Transpose", "Backward", "Columnwise", &i__3, 
 			&i__4, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &
 			work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &
 			ldwork);
 	    }
 /* L10: */
 	}
 	mu = *m - k + i__ + nb - 1;
 	nu = *n - k + i__ + nb - 1;
    } else {
 	mu = *m;
 	nu = *n;
    }
 /*     Use unblocked code to factor the last or only block */
    if (mu > 0 && nu > 0) {
 	dgeql2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    }
    work[1] = (doublereal) iws;
    return 0;
 /*     End of DGEQLF */
 } /* dgeqlf_ */
--- a/lapack-netlib/SRC/dgeqp3.c
+++ b/lapack-netlib/SRC/dgeqp3.c
@@ -0,0 +1,795 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 /* > \brief \b DGEQP3 */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQP3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqp3.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqp3.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqp3.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) */
 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       INTEGER            JPVT( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQP3 computes a QR factorization with column pivoting of a */
 /* > matrix A:  A*P = Q*R  using Level 3 BLAS. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A. M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the upper triangle of the array contains the */
 /* >          f2cmin(M,N)-by-N upper trapezoidal matrix R; the elements below */
 /* >          the diagonal, together with the array TAU, represent the */
 /* >          orthogonal matrix Q as a product of f2cmin(M,N) elementary */
 /* >          reflectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] JPVT */
 /* > \verbatim */
 /* >          JPVT is INTEGER array, dimension (N) */
 /* >          On entry, if JPVT(J).ne.0, the J-th column of A is permuted */
 /* >          to the front of A*P (a leading column); if JPVT(J)=0, */
 /* >          the J-th column of A is a free column. */
 /* >          On exit, if JPVT(J)=K, then the J-th column of A*P was the */
 /* >          the K-th column of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO=0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= 3*N+1. */
 /* >          For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB */
 /* >          is the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit. */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real/complex vector */
 /* >  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in */
 /* >  A(i+1:m,i), and tau in TAU(i). */
 /* > \endverbatim */
 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
 /* >    X. Sun, Computer Science Dept., Duke University, USA */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqp3_(integer *m, integer *n, doublereal *a, integer *
 	lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork,
 	 integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    integer nfxd;
    extern doublereal dnrm2_(integer *, doublereal *, integer *);
    integer j, nbmin, minmn;
    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *);
    integer minws;
    extern /* Subroutine */ int dlaqp2_(integer *, integer *, integer *, 
 	    doublereal *, integer *, integer *, doublereal *, doublereal *, 
 	    doublereal *, doublereal *);
    integer jb, na, nb, sm, sn, nx;
    extern /* Subroutine */ int dgeqrf_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *, integer *), 
 	    xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int dlaqps_(integer *, integer *, integer *, 
 	    integer *, integer *, doublereal *, integer *, integer *, 
 	    doublereal *, doublereal *, doublereal *, doublereal *, 
 	    doublereal *, integer *);
    integer topbmn, sminmn;
    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
 	    integer *, doublereal *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    integer fjb, iws;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test input arguments */
 /*  ==================== */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --jpvt;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info == 0) {
 	minmn = f2cmin(*m,*n);
 	if (minmn == 0) {
 	    iws = 1;
 	    lwkopt = 1;
 	} else {
 	    iws = *n * 3 + 1;
 	    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    lwkopt = (*n << 1) + (*n + 1) * nb;
 	}
 	work[1] = (doublereal) lwkopt;
 	if (*lwork < iws && ! lquery) {
 	    *info = -8;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQP3", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Move initial columns up front. */
    nfxd = 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	if (jpvt[j] != 0) {
 	    if (j != nfxd) {
 		dswap_(m, &a[j * a_dim1 + 1], &c__1, &a[nfxd * a_dim1 + 1], &
 			c__1);
 		jpvt[j] = jpvt[nfxd];
 		jpvt[nfxd] = j;
 	    } else {
 		jpvt[j] = j;
 	    }
 	    ++nfxd;
 	} else {
 	    jpvt[j] = j;
 	}
 /* L10: */
    }
    --nfxd;
 /*     Factorize fixed columns */
 /*  ======================= */
 /*     Compute the QR factorization of fixed columns and update */
 /*     remaining columns. */
    if (nfxd > 0) {
 	na = f2cmin(*m,nfxd);
 /* CC      CALL DGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) */
 	dgeqrf_(m, &na, &a[a_offset], lda, &tau[1], &work[1], lwork, info);
 /* Computing MAX */
 	i__1 = iws, i__2 = (integer) work[1];
 	iws = f2cmax(i__1,i__2);
 	if (na < *n) {
 /* CC         CALL DORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, */
 /* CC  $                   TAU, A( 1, NA+1 ), LDA, WORK, INFO ) */
 	    i__1 = *n - na;
 	    dormqr_("Left", "Transpose", m, &i__1, &na, &a[a_offset], lda, &
 		    tau[1], &a[(na + 1) * a_dim1 + 1], lda, &work[1], lwork, 
 		    info);
 /* Computing MAX */
 	    i__1 = iws, i__2 = (integer) work[1];
 	    iws = f2cmax(i__1,i__2);
 	}
    }
 /*     Factorize free columns */
 /*  ====================== */
    if (nfxd < minmn) {
 	sm = *m - nfxd;
 	sn = *n - nfxd;
 	sminmn = minmn - nfxd;
 /*        Determine the block size. */
 	nb = ilaenv_(&c__1, "DGEQRF", " ", &sm, &sn, &c_n1, &c_n1, (ftnlen)6, 
 		(ftnlen)1);
 	nbmin = 2;
 	nx = 0;
 	if (nb > 1 && nb < sminmn) {
 /*           Determine when to cross over from blocked to unblocked code. */
 /* Computing MAX */
 	    i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", &sm, &sn, &c_n1, &
 		    c_n1, (ftnlen)6, (ftnlen)1);
 	    nx = f2cmax(i__1,i__2);
 	    if (nx < sminmn) {
 /*              Determine if workspace is large enough for blocked code. */
 		minws = (sn << 1) + (sn + 1) * nb;
 		iws = f2cmax(iws,minws);
 		if (*lwork < minws) {
 /*                 Not enough workspace to use optimal NB: Reduce NB and */
 /*                 determine the minimum value of NB. */
 		    nb = (*lwork - (sn << 1)) / (sn + 1);
 /* Computing MAX */
 		    i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", &sm, &sn, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 		    nbmin = f2cmax(i__1,i__2);
 		}
 	    }
 	}
 /*        Initialize partial column norms. The first N elements of work */
 /*        store the exact column norms. */
 	i__1 = *n;
 	for (j = nfxd + 1; j <= i__1; ++j) {
 	    work[j] = dnrm2_(&sm, &a[nfxd + 1 + j * a_dim1], &c__1);
 	    work[*n + j] = work[j];
 /* L20: */
 	}
 	if (nb >= nbmin && nb < sminmn && nx < sminmn) {
 /*           Use blocked code initially. */
 	    j = nfxd + 1;
 /*           Compute factorization: while loop. */
 	    topbmn = minmn - nx;
 L30:
 	    if (j <= topbmn) {
 /* Computing MIN */
 		i__1 = nb, i__2 = topbmn - j + 1;
 		jb = f2cmin(i__1,i__2);
 /*              Factorize JB columns among columns J:N. */
 		i__1 = *n - j + 1;
 		i__2 = j - 1;
 		i__3 = *n - j + 1;
 		dlaqps_(m, &i__1, &i__2, &jb, &fjb, &a[j * a_dim1 + 1], lda, &
 			jpvt[j], &tau[j], &work[j], &work[*n + j], &work[(*n 
 			<< 1) + 1], &work[(*n << 1) + jb + 1], &i__3);
 		j += fjb;
 		goto L30;
 	    }
 	} else {
 	    j = nfxd + 1;
 	}
 /*        Use unblocked code to factor the last or only block. */
 	if (j <= minmn) {
 	    i__1 = *n - j + 1;
 	    i__2 = j - 1;
 	    dlaqp2_(m, &i__1, &i__2, &a[j * a_dim1 + 1], lda, &jpvt[j], &tau[
 		    j], &work[j], &work[*n + j], &work[(*n << 1) + 1]);
 	}
    }
    work[1] = (doublereal) iws;
    return 0;
 /*     End of DGEQP3 */
 } /* dgeqp3_ */
--- a/lapack-netlib/SRC/dgeqr.c
+++ b/lapack-netlib/SRC/dgeqr.c
@@ -0,0 +1,735 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 /* > \brief \b DGEQR */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, */
 /*                         INFO ) */
 /*       INTEGER           INFO, LDA, M, N, TSIZE, LWORK */
 /*       DOUBLE PRECISION  A( LDA, * ), T( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQR computes a QR factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a M-by-M orthogonal matrix; */
 /* >    R is an upper-triangular N-by-N matrix; */
 /* >    0 is a (M-N)-by-N zero matrix, if M > N. */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R */
 /* >          (R is upper triangular if M >= N); */
 /* >          the elements below the diagonal are used to store part of the */
 /* >          data structure to represent Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)) */
 /* >          On exit, if INFO = 0, T(1) returns optimal (or either minimal */
 /* >          or optimal, if query is assumed) TSIZE. See TSIZE for details. */
 /* >          Remaining T contains part of the data structure used to represent Q. */
 /* >          If one wants to apply or construct Q, then one needs to keep T */
 /* >          (in addition to A) and pass it to further subroutines. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TSIZE */
 /* > \verbatim */
 /* >          TSIZE is INTEGER */
 /* >          If TSIZE >= 5, the dimension of the array T. */
 /* >          If TSIZE = -1 or -2, then a workspace query is assumed. The routine */
 /* >          only calculates the sizes of the T and WORK arrays, returns these */
 /* >          values as the first entries of the T and WORK arrays, and no error */
 /* >          message related to T or WORK is issued by XERBLA. */
 /* >          If TSIZE = -1, the routine calculates optimal size of T for the */
 /* >          optimum performance and returns this value in T(1). */
 /* >          If TSIZE = -2, the routine calculates minimal size of T and */
 /* >          returns this value in T(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
 /* >          or optimal, if query was assumed) LWORK. */
 /* >          See LWORK for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1 or -2, then a workspace query is assumed. The routine */
 /* >          only calculates the sizes of the T and WORK arrays, returns these */
 /* >          values as the first entries of the T and WORK arrays, and no error */
 /* >          message related to T or WORK is issued by XERBLA. */
 /* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
 /* >          optimal performance and returns this value in WORK(1). */
 /* >          If LWORK = -2, the routine calculates minimal size of WORK and */
 /* >          returns this value in WORK(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \par Further Details */
 /*  ==================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > The goal of the interface is to give maximum freedom to the developers for */
 /* > creating any QR factorization algorithm they wish. The triangular */
 /* > (trapezoidal) R has to be stored in the upper part of A. The lower part of A */
 /* > and the array T can be used to store any relevant information for applying or */
 /* > constructing the Q factor. The WORK array can safely be discarded after exit. */
 /* > */
 /* > Caution: One should not expect the sizes of T and WORK to be the same from one */
 /* > LAPACK implementation to the other, or even from one execution to the other. */
 /* > A workspace query (for T and WORK) is needed at each execution. However, */
 /* > for a given execution, the size of T and WORK are fixed and will not change */
 /* > from one query to the next. */
 /* > */
 /* > \endverbatim */
 /* > */
 /* > \par Further Details particular to this LAPACK implementation: */
 /*  ============================================================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > These details are particular for this LAPACK implementation. Users should not */
 /* > take them for granted. These details may change in the future, and are not likely */
 /* > true for another LAPACK implementation. These details are relevant if one wants */
 /* > to try to understand the code. They are not part of the interface. */
 /* > */
 /* > In this version, */
 /* > */
 /* >          T(2): row block size (MB) */
 /* >          T(3): column block size (NB) */
 /* >          T(6:TSIZE): data structure needed for Q, computed by */
 /* >                           DLATSQR or DGEQRT */
 /* > */
 /* >  Depending on the matrix dimensions M and N, and row and column */
 /* >  block sizes MB and NB returned by ILAENV, DGEQR will use either */
 /* >  DLATSQR (if the matrix is tall-and-skinny) or DGEQRT to compute */
 /* >  the QR factorization. */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqr_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *t, integer *tsize, doublereal *work, integer *lwork, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    /* Local variables */
    logical mint, minw;
    integer mb, nb, nblcks;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int dgeqrt_(integer *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    logical lminws, lquery;
    integer mintsz;
    extern /* Subroutine */ int dlatsqr_(integer *, integer *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, integer *);
 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */
 /*     November 2019 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --t;
    --work;
    /* Function Body */
    *info = 0;
    lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2;
    mint = FALSE_;
    minw = FALSE_;
    if (*tsize == -2 || *lwork == -2) {
 	if (*tsize != -1) {
 	    mint = TRUE_;
 	}
 	if (*lwork != -1) {
 	    minw = TRUE_;
 	}
    }
 /*     Determine the block size */
    if (f2cmin(*m,*n) > 0) {
 	mb = ilaenv_(&c__1, "DGEQR ", " ", m, n, &c__1, &c_n1, (ftnlen)6, (
 		ftnlen)1);
 	nb = ilaenv_(&c__1, "DGEQR ", " ", m, n, &c__2, &c_n1, (ftnlen)6, (
 		ftnlen)1);
    } else {
 	mb = *m;
 	nb = 1;
    }
    if (mb > *m || mb <= *n) {
 	mb = *m;
    }
    if (nb > f2cmin(*m,*n) || nb < 1) {
 	nb = 1;
    }
    mintsz = *n + 5;
    if (mb > *n && *m > *n) {
 	if ((*m - *n) % (mb - *n) == 0) {
 	    nblcks = (*m - *n) / (mb - *n);
 	} else {
 	    nblcks = (*m - *n) / (mb - *n) + 1;
 	}
    } else {
 	nblcks = 1;
    }
 /*     Determine if the workspace size satisfies minimal size */
    lminws = FALSE_;
 /* Computing MAX */
    i__1 = 1, i__2 = nb * *n * nblcks + 5;
    if ((*tsize < f2cmax(i__1,i__2) || *lwork < nb * *n) && *lwork >= *n && *
 	    tsize >= mintsz && ! lquery) {
 /* Computing MAX */
 	i__1 = 1, i__2 = nb * *n * nblcks + 5;
 	if (*tsize < f2cmax(i__1,i__2)) {
 	    lminws = TRUE_;
 	    nb = 1;
 	    mb = *m;
 	}
 	if (*lwork < nb * *n) {
 	    lminws = TRUE_;
 	    nb = 1;
 	}
    }
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = nb * *n * nblcks + 5;
 	if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
 	    *info = -6;
 	} else /* if(complicated condition) */ {
 /* Computing MAX */
 	    i__1 = 1, i__2 = *n * nb;
 	    if (*lwork < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
 		*info = -8;
 	    }
 	}
    }
    if (*info == 0) {
 	if (mint) {
 	    t[1] = (doublereal) mintsz;
 	} else {
 	    t[1] = (doublereal) (nb * *n * nblcks + 5);
 	}
 	t[2] = (doublereal) mb;
 	t[3] = (doublereal) nb;
 	if (minw) {
 	    work[1] = (doublereal) f2cmax(1,*n);
 	} else {
 /* Computing MAX */
 	    i__1 = 1, i__2 = nb * *n;
 	    work[1] = (doublereal) f2cmax(i__1,i__2);
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQR", &i__1, (ftnlen)5);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    if (f2cmin(*m,*n) == 0) {
 	return 0;
    }
 /*     The QR Decomposition */
    if (*m <= *n || mb <= *n || mb >= *m) {
 	dgeqrt_(m, n, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], info);
    } else {
 	dlatsqr_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], 
 		lwork, info);
    }
 /* Computing MAX */
    i__1 = 1, i__2 = nb * *n;
    work[1] = (doublereal) f2cmax(i__1,i__2);
    return 0;
 /*     End of DGEQR */
 } /* dgeqr_ */
--- a/lapack-netlib/SRC/dgeqr2.c
+++ b/lapack-netlib/SRC/dgeqr2.c
@@ -0,0 +1,602 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 /* > \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorit
 hm. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQR2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQR2 computes a QR factorization of a real m-by-n matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a m-by-m orthogonal matrix; */
 /* >    R is an upper-triangular n-by-n matrix; */
 /* >    0 is a (m-n)-by-n zero matrix, if m > n. */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */
 /* >          upper triangular if m >= n); the elements below the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2019 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
 /* >  and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *), dlarfg_(integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
    doublereal aii;
 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQR2", &i__1, (ftnlen)6);
 	return 0;
    }
    k = f2cmin(*m,*n);
    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
 	i__2 = *m - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
 	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1]
 		, &c__1, &tau[i__]);
 	if (i__ < *n) {
 /*           Apply H(i) to A(i:m,i+1:n) from the left */
 	    aii = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.;
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
 		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
 /* L10: */
    }
    return 0;
 /*     End of DGEQR2 */
 } /* dgeqr2_ */
--- a/lapack-netlib/SRC/dgeqr2p.c
+++ b/lapack-netlib/SRC/dgeqr2p.c
@@ -0,0 +1,607 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 /* > \brief \b DGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagona
 l elements using an unblocked algorithm. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQR2P + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2p
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2p
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2p
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQR2P computes a QR factorization of a real m-by-n matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a m-by-m orthogonal matrix; */
 /* >    R is an upper-triangular n-by-n matrix with nonnegative diagonal */
 /* >    entries; */
 /* >    0 is a (m-n)-by-n zero matrix, if m > n. */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */
 /* >          upper triangular if m >= n). The diagonal entries of R are */
 /* >          nonnegative; the elements below the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2019 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
 /* >  and tau in TAU(i). */
 /* > */
 /* > See Lapack Working Note 203 for details */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqr2p_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *), xerbla_(char *, integer *, ftnlen);
    doublereal aii;
    extern /* Subroutine */ int dlarfgp_(integer *, doublereal *, doublereal *
 	    , integer *, doublereal *);
 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQR2P", &i__1, (ftnlen)7);
 	return 0;
    }
    k = f2cmin(*m,*n);
    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
 	i__2 = *m - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
 	dlarfgp_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * 
 		a_dim1], &c__1, &tau[i__]);
 	if (i__ < *n) {
 /*           Apply H(i) to A(i:m,i+1:n) from the left */
 	    aii = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.;
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
 		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
 /* L10: */
    }
    return 0;
 /*     End of DGEQR2P */
 } /* dgeqr2p_ */
--- a/lapack-netlib/SRC/dgeqrf.c
+++ b/lapack-netlib/SRC/dgeqrf.c
@@ -0,0 +1,702 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 /* > \brief \b DGEQRF */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQRF computes a QR factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a M-by-M orthogonal matrix; */
 /* >    R is an upper-triangular N-by-N matrix; */
 /* >    0 is a (M-N)-by-N zero matrix, if M > N. */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
 /* >          upper triangular if m >= n); the elements below the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of f2cmin(m,n) elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For optimum performance LWORK >= N*NB, where NB is */
 /* >          the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2019 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
 /* >  and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer ib, nb;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer nx;
    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;
 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
    lwkopt = *n * nb;
    work[1] = (doublereal) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQRF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    k = f2cmin(*m,*n);
    if (k == 0) {
 	work[1] = 1.;
 	return 0;
    }
    nbmin = 2;
    nx = 0;
    iws = *n;
    if (nb > 1 && nb < k) {
 /*        Determine when to cross over from blocked to unblocked code. */
 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {
 /*           Determine if workspace is large enough for blocked code. */
 	    ldwork = *n;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {
 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */
 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }
    if (nb >= nbmin && nb < k && nx < k) {
 /*        Use blocked code initially */
 	i__1 = k - nx;
 	i__2 = nb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);
 /*           Compute the QR factorization of the current block */
 /*           A(i:m,i:i+ib-1) */
 	    i__3 = *m - i__ + 1;
 	    dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
 		    1], &iinfo);
 	    if (i__ + ib <= *n) {
 /*              Form the triangular factor of the block reflector */
 /*              H = H(i) H(i+1) . . . H(i+ib-1) */
 		i__3 = *m - i__ + 1;
 		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * 
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 /*              Apply H**T to A(i:m,i+ib:n) from the left */
 		i__3 = *m - i__ + 1;
 		i__4 = *n - i__ - ib + 1;
 		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib 
 			+ 1], &ldwork);
 	    }
 /* L10: */
 	}
    } else {
 	i__ = 1;
    }
 /*     Use unblocked code to factor the last or only block. */
    if (i__ <= k) {
 	i__2 = *m - i__ + 1;
 	i__1 = *n - i__ + 1;
 	dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
 		, &iinfo);
    }
    work[1] = (doublereal) iws;
    return 0;
 /*     End of DGEQRF */
 } /* dgeqrf_ */
--- a/lapack-netlib/SRC/dgeqrfp.c
+++ b/lapack-netlib/SRC/dgeqrfp.c
@@ -0,0 +1,705 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 /* > \brief \b DGEQRFP */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQRFP + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrfp
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrfp
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrfp
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQR2P computes a QR factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a M-by-M orthogonal matrix; */
 /* >    R is an upper-triangular N-by-N matrix with nonnegative diagonal */
 /* >    entries; */
 /* >    0 is a (M-N)-by-N zero matrix, if M > N. */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
 /* >          upper triangular if m >= n). The diagonal entries of R */
 /* >          are nonnegative; the elements below the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of f2cmin(m,n) elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For optimum performance LWORK >= N*NB, where NB is */
 /* >          the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2019 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
 /* >  and tau in TAU(i). */
 /* > */
 /* > See Lapack Working Note 203 for details */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqrfp_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    integer i__, k, nbmin, iinfo, ib, nb;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer nx;
    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    extern /* Subroutine */ int dgeqr2p_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer iws;
 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
    lwkopt = *n * nb;
    work[1] = (doublereal) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQRFP", &i__1, (ftnlen)7);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    k = f2cmin(*m,*n);
    if (k == 0) {
 	work[1] = 1.;
 	return 0;
    }
    nbmin = 2;
    nx = 0;
    iws = *n;
    if (nb > 1 && nb < k) {
 /*        Determine when to cross over from blocked to unblocked code. */
 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {
 /*           Determine if workspace is large enough for blocked code. */
 	    ldwork = *n;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {
 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */
 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }
    if (nb >= nbmin && nb < k && nx < k) {
 /*        Use blocked code initially */
 	i__1 = k - nx;
 	i__2 = nb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);
 /*           Compute the QR factorization of the current block */
 /*           A(i:m,i:i+ib-1) */
 	    i__3 = *m - i__ + 1;
 	    dgeqr2p_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
 		    work[1], &iinfo);
 	    if (i__ + ib <= *n) {
 /*              Form the triangular factor of the block reflector */
 /*              H = H(i) H(i+1) . . . H(i+ib-1) */
 		i__3 = *m - i__ + 1;
 		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * 
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 /*              Apply H**T to A(i:m,i+ib:n) from the left */
 		i__3 = *m - i__ + 1;
 		i__4 = *n - i__ - ib + 1;
 		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib 
 			+ 1], &ldwork);
 	    }
 /* L10: */
 	}
    } else {
 	i__ = 1;
    }
 /*     Use unblocked code to factor the last or only block. */
    if (i__ <= k) {
 	i__2 = *m - i__ + 1;
 	i__1 = *n - i__ + 1;
 	dgeqr2p_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
 		1], &iinfo);
    }
    work[1] = (doublereal) iws;
    return 0;
 /*     End of DGEQRFP */
 } /* dgeqrfp_ */
--- a/lapack-netlib/SRC/dgeqrt.c
+++ b/lapack-netlib/SRC/dgeqrt.c
@@ -0,0 +1,631 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGEQRT */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQRT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrt.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrt.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrt.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) */
 /*       INTEGER INFO, LDA, LDT, M, N, NB */
 /*       DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQRT computes a blocked QR factorization of a real M-by-N matrix A */
 /* > using the compact WY representation of Q. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB */
 /* > \verbatim */
 /* >          NB is INTEGER */
 /* >          The block size to be used in the blocked QR.  MIN(M,N) >= NB >= 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
 /* >          upper triangular if M >= N); the elements below the diagonal */
 /* >          are the columns of V. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (LDT,MIN(M,N)) */
 /* >          The upper triangular block reflectors stored in compact form */
 /* >          as a sequence of upper triangular blocks.  See below */
 /* >          for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= NB. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (NB*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date June 2017 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
 /* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1       ) */
 /* >                   ( v1  1    ) */
 /* >                   ( v1 v2  1 ) */
 /* >                   ( v1 v2 v3 ) */
 /* >                   ( v1 v2 v3 ) */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A. */
 /* > */
 /* >  Let K=MIN(M,N).  The number of blocks is B = ceiling(K/NB), where each */
 /* >  block is of order NB except for the last block, which is of order */
 /* >  IB = K - (B-1)*NB.  For each of the B blocks, a upper triangular block */
 /* >  reflector factor is computed: T1, T2, ..., TB.  The NB-by-NB (and IB-by-IB */
 /* >  for the last block) T's are stored in the NB-by-K matrix T as */
 /* > */
 /* >               T = (T1 T2 ... TB). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqrt_(integer *m, integer *n, integer *nb, doublereal *
 	a, integer *lda, doublereal *t, integer *ldt, doublereal *work, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5;
    /* Local variables */
    integer i__, k, iinfo, ib;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *), xerbla_(char *, 
 	    integer *, ftnlen), dgeqrt2_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, integer *, integer *), dgeqrt3_(integer *
 	    , integer *, doublereal *, integer *, doublereal *, integer *, 
 	    integer *);
 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */
 /* ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    --work;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nb < 1 || *nb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldt < *nb) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQRT", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    k = f2cmin(*m,*n);
    if (k == 0) {
 	return 0;
    }
 /*     Blocked loop of length K */
    i__1 = k;
    i__2 = *nb;
    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	i__3 = k - i__ + 1;
 	ib = f2cmin(i__3,*nb);
 /*     Compute the QR factorization of the current block A(I:M,I:I+IB-1) */
 	if (TRUE_) {
 	    i__3 = *m - i__ + 1;
 	    dgeqrt3_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 
 		    + 1], ldt, &iinfo);
 	} else {
 	    i__3 = *m - i__ + 1;
 	    dgeqrt2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 
 		    + 1], ldt, &iinfo);
 	}
 	if (i__ + ib <= *n) {
 /*     Update by applying H**T to A(I:M,I+IB:N) from the left */
 	    i__3 = *m - i__ + 1;
 	    i__4 = *n - i__ - ib + 1;
 	    i__5 = *n - i__ - ib + 1;
 	    dlarfb_("L", "T", "F", "C", &i__3, &i__4, &ib, &a[i__ + i__ * 
 		    a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + (i__ + 
 		    ib) * a_dim1], lda, &work[1], &i__5);
 	}
    }
    return 0;
 /*     End of DGEQRT */
 } /* dgeqrt_ */
--- a/lapack-netlib/SRC/dgeqrt2.c
+++ b/lapack-netlib/SRC/dgeqrt2.c
@@ -0,0 +1,648 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static doublereal c_b5 = 1.;
 static doublereal c_b7 = 0.;
 /* > \brief \b DGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY re
 presentation of Q. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQRT2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrt2
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrt2
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrt2
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGEQRT2( M, N, A, LDA, T, LDT, INFO ) */
 /*       INTEGER   INFO, LDA, LDT, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), T( LDT, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQRT2 computes a QR factorization of a real M-by-N matrix A, */
 /* > using the compact WY representation of Q. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
 /* >          above the diagonal contain the N-by-N upper triangular matrix R; the */
 /* >          elements below the diagonal are the columns of V.  See below for */
 /* >          further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (LDT,N) */
 /* >          The N-by-N upper triangular factor of the block reflector. */
 /* >          The elements on and above the diagonal contain the block */
 /* >          reflector T; the elements below the diagonal are not used. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
 /* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1       ) */
 /* >                   ( v1  1    ) */
 /* >                   ( v1 v2  1 ) */
 /* >                   ( v1 v2 v3 ) */
 /* >                   ( v1 v2 v3 ) */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
 /* >  block reflector H is then given by */
 /* > */
 /* >               H = I - V * T * V**T */
 /* > */
 /* >  where V**T is the transpose of V. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqrt2_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *t, integer *ldt, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3;
    /* Local variables */
    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer i__, k;
    doublereal alpha;
    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
 	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
 	    doublereal *, doublereal *, integer *), dtrmv_(char *, 
 	    char *, char *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *), dlarfg_(integer *, doublereal 
 	    *, doublereal *, integer *, doublereal *), xerbla_(char *, 
 	    integer *, ftnlen);
    doublereal aii;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*ldt < f2cmax(1,*n)) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQRT2", &i__1, (ftnlen)7);
 	return 0;
    }
    k = f2cmin(*m,*n);
    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /*        Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) */
 	i__2 = *m - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
 	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1]
 		, &c__1, &t[i__ + t_dim1]);
 	if (i__ < *n) {
 /*           Apply H(i) to A(I:M,I+1:N) from the left */
 	    aii = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.;
 /*           W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] */
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    dgemv_("T", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], 
 		    lda, &a[i__ + i__ * a_dim1], &c__1, &c_b7, &t[*n * t_dim1 
 		    + 1], &c__1);
 /*           A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H */
 	    alpha = -t[i__ + t_dim1];
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    dger_(&i__2, &i__3, &alpha, &a[i__ + i__ * a_dim1], &c__1, &t[*n *
 		     t_dim1 + 1], &c__1, &a[i__ + (i__ + 1) * a_dim1], lda);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
    }
    i__1 = *n;
    for (i__ = 2; i__ <= i__1; ++i__) {
 	aii = a[i__ + i__ * a_dim1];
 	a[i__ + i__ * a_dim1] = 1.;
 /*        T(1:I-1,I) := alpha * A(I:M,1:I-1)**T * A(I:M,I) */
 	alpha = -t[i__ + t_dim1];
 	i__2 = *m - i__ + 1;
 	i__3 = i__ - 1;
 	dgemv_("T", &i__2, &i__3, &alpha, &a[i__ + a_dim1], lda, &a[i__ + i__ 
 		* a_dim1], &c__1, &c_b7, &t[i__ * t_dim1 + 1], &c__1);
 	a[i__ + i__ * a_dim1] = aii;
 /*        T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) */
 	i__2 = i__ - 1;
 	dtrmv_("U", "N", "N", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], 
 		&c__1);
 /*           T(I,I) = tau(I) */
 	t[i__ + i__ * t_dim1] = t[i__ + t_dim1];
 	t[i__ + t_dim1] = 0.;
    }
 /*     End of DGEQRT2 */
    return 0;
 } /* dgeqrt2_ */
--- a/lapack-netlib/SRC/dgeqrt3.c
+++ b/lapack-netlib/SRC/dgeqrt3.c
@@ -0,0 +1,681 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static doublereal c_b8 = 1.;
 static doublereal c_b20 = -1.;
 /* > \brief \b DGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the c
 ompact WY representation of Q. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGEQRT3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrt3
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrt3
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrt3
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*        SUBROUTINE DGEQRT3( M, N, A, LDA, T, LDT, INFO ) */
 /*       INTEGER   INFO, LDA, M, N, LDT */
 /*       DOUBLE PRECISION   A( LDA, * ), T( LDT, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEQRT3 recursively computes a QR factorization of a real M-by-N */
 /* > matrix A, using the compact WY representation of Q. */
 /* > */
 /* > Based on the algorithm of Elmroth and Gustavson, */
 /* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
 /* >          above the diagonal contain the N-by-N upper triangular matrix R; the */
 /* >          elements below the diagonal are the columns of V.  See below for */
 /* >          further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is DOUBLE PRECISION array, dimension (LDT,N) */
 /* >          The N-by-N upper triangular factor of the block reflector. */
 /* >          The elements on and above the diagonal contain the block */
 /* >          reflector T; the elements below the diagonal are not used. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date June 2016 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
 /* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1       ) */
 /* >                   ( v1  1    ) */
 /* >                   ( v1 v2  1 ) */
 /* >                   ( v1 v2 v3 ) */
 /* >                   ( v1 v2 v3 ) */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
 /* >  block reflector H is then given by */
 /* > */
 /* >               H = I - V * T * V**T */
 /* > */
 /* >  where V**T is the transpose of V. */
 /* > */
 /* >  For details of the algorithm, see Elmroth and Gustavson (cited above). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgeqrt3_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *t, integer *ldt, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2;
    /* Local variables */
    integer i__, j;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer iinfo;
    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *);
    integer i1, j1, n1, n2;
    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
 	     integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */
 /*  ===================================================================== */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -2;
    } else if (*m < *n) {
 	*info = -1;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*ldt < f2cmax(1,*n)) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEQRT3", &i__1, (ftnlen)7);
 	return 0;
    }
    if (*n == 1) {
 /*        Compute Householder transform when N=1 */
 	dlarfg_(m, &a[a_dim1 + 1], &a[f2cmin(2,*m) + a_dim1], &c__1, &t[t_dim1 + 
 		1]);
    } else {
 /*        Otherwise, split A into blocks... */
 	n1 = *n / 2;
 	n2 = *n - n1;
 /* Computing MIN */
 	i__1 = n1 + 1;
 	j1 = f2cmin(i__1,*n);
 /* Computing MIN */
 	i__1 = *n + 1;
 	i1 = f2cmin(i__1,*m);
 /*        Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */
 	dgeqrt3_(m, &n1, &a[a_offset], lda, &t[t_offset], ldt, &iinfo);
 /*        Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] */
 	i__1 = n2;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = n1;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 		t[i__ + (j + n1) * t_dim1] = a[i__ + (j + n1) * a_dim1];
 	    }
 	}
 	dtrmm_("L", "L", "T", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;
 	i__1 = *m - n1;
 	dgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[j1 + a_dim1], lda, &a[j1 
 		+ j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt);
 	dtrmm_("L", "U", "T", "N", &n1, &n2, &c_b8, &t[t_offset], ldt, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;
 	i__1 = *m - n1;
 	dgemm_("N", "N", &i__1, &n2, &n1, &c_b20, &a[j1 + a_dim1], lda, &t[j1 
 		* t_dim1 + 1], ldt, &c_b8, &a[j1 + j1 * a_dim1], lda);
 	dtrmm_("L", "L", "N", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;
 	i__1 = n2;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = n1;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 		a[i__ + (j + n1) * a_dim1] -= t[i__ + (j + n1) * t_dim1];
 	    }
 	}
 /*        Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */
 	i__1 = *m - n1;
 	dgeqrt3_(&i__1, &n2, &a[j1 + j1 * a_dim1], lda, &t[j1 + j1 * t_dim1], 
 		ldt, &iinfo);
 /*        Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 */
 	i__1 = n1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = n2;
 	    for (j = 1; j <= i__2; ++j) {
 		t[i__ + (j + n1) * t_dim1] = a[j + n1 + i__ * a_dim1];
 	    }
 	}
 	dtrmm_("R", "L", "N", "U", &n1, &n2, &c_b8, &a[j1 + j1 * a_dim1], lda,
 		 &t[j1 * t_dim1 + 1], ldt);
 	i__1 = *m - *n;
 	dgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[i1 + a_dim1], lda, &a[i1 
 		+ j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt);
 	dtrmm_("L", "U", "N", "N", &n1, &n2, &c_b20, &t[t_offset], ldt, &t[j1 
 		* t_dim1 + 1], ldt);
 	dtrmm_("R", "U", "N", "N", &n1, &n2, &c_b8, &t[j1 + j1 * t_dim1], ldt,
 		 &t[j1 * t_dim1 + 1], ldt);
 /*        Y = (Y1,Y2); R = [ R1  A(1:N1,J1:N) ];  T = [T1 T3] */
 /*                         [  0        R2     ]       [ 0 T2] */
    }
    return 0;
 /*     End of DGEQRT3 */
 } /* dgeqrt3_ */
--- a/lapack-netlib/SRC/dgerfs.c
+++ b/lapack-netlib/SRC/dgerfs.c
@@ -0,0 +1,882 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static doublereal c_b15 = -1.;
 static doublereal c_b17 = 1.;
 /* > \brief \b DGERFS */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGERFS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgerfs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgerfs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgerfs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */
 /*                          X, LDX, FERR, BERR, WORK, IWORK, INFO ) */
 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS */
 /*       INTEGER            IPIV( * ), IWORK( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
 /*      $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGERFS improves the computed solution to a system of linear */
 /* > equations and provides error bounds and backward error estimates for */
 /* > the solution. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrices B and X.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          The original N-by-N matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AF */
 /* > \verbatim */
 /* >          AF is DOUBLE PRECISION array, dimension (LDAF,N) */
 /* >          The factors L and U from the factorization A = P*L*U */
 /* >          as computed by DGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAF */
 /* > \verbatim */
 /* >          LDAF is INTEGER */
 /* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from DGETRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
 /* >          The right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] X */
 /* > \verbatim */
 /* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
 /* >          On entry, the solution matrix X, as computed by DGETRS. */
 /* >          On exit, the improved solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (3*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  ITMAX is the maximum number of steps of iterative refinement. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgerfs_(char *trans, integer *n, integer *nrhs, 
 	doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *
 	ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, 
 	doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, 
 	integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
 	    x_offset, i__1, i__2, i__3;
    doublereal d__1, d__2, d__3;
    /* Local variables */
    integer kase;
    doublereal safe1, safe2;
    integer i__, j, k;
    doublereal s;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
 	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
 	    doublereal *, doublereal *, integer *);
    integer isave[3];
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *), daxpy_(integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *, integer *);
    integer count;
    extern /* Subroutine */ int dlacn2_(integer *, doublereal *, doublereal *,
 	     integer *, doublereal *, integer *, integer *);
    extern doublereal dlamch_(char *);
    doublereal xk;
    integer nz;
    doublereal safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dgetrs_(
 	    char *, integer *, integer *, doublereal *, integer *, integer *, 
 	    doublereal *, integer *, integer *);
    logical notran;
    char transt[1];
    doublereal lstres, eps;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --iwork;
    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldaf < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -10;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -12;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGERFS", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*n == 0 || *nrhs == 0) {
 	i__1 = *nrhs;
 	for (j = 1; j <= i__1; ++j) {
 	    ferr[j] = 0.;
 	    berr[j] = 0.;
 /* L10: */
 	}
 	return 0;
    }
    if (notran) {
 	*(unsigned char *)transt = 'T';
    } else {
 	*(unsigned char *)transt = 'N';
    }
 /*     NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz = *n + 1;
    eps = dlamch_("Epsilon");
    safmin = dlamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;
 /*     Do for each right hand side */
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
 	count = 1;
 	lstres = 3.;
 L20:
 /*        Loop until stopping criterion is satisfied. */
 /*        Compute residual R = B - op(A) * X, */
 /*        where op(A) = A, A**T, or A**H, depending on TRANS. */
 	dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
 	dgemv_(trans, n, n, &c_b15, &a[a_offset], lda, &x[j * x_dim1 + 1], &
 		c__1, &c_b17, &work[*n + 1], &c__1);
 /*        Compute componentwise relative backward error from formula */
 /*        f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */
 /*        where abs(Z) is the componentwise absolute value of the matrix */
 /*        or vector Z.  If the i-th component of the denominator is less */
 /*        than SAFE2, then SAFE1 is added to the i-th components of the */
 /*        numerator and denominator before dividing. */
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1));
 /* L30: */
 	}
 /*        Compute abs(op(A))*abs(X) + abs(B). */
 	if (notran) {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		xk = (d__1 = x[k + j * x_dim1], abs(d__1));
 		i__3 = *n;
 		for (i__ = 1; i__ <= i__3; ++i__) {
 		    work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk;
 /* L40: */
 		}
 /* L50: */
 	    }
 	} else {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		s = 0.;
 		i__3 = *n;
 		for (i__ = 1; i__ <= i__3; ++i__) {
 		    s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[
 			    i__ + j * x_dim1], abs(d__2));
 /* L60: */
 		}
 		work[k] += s;
 /* L70: */
 	    }
 	}
 	s = 0.;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (work[i__] > safe2) {
 /* Computing MAX */
 		d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[
 			i__];
 		s = f2cmax(d__2,d__3);
 	    } else {
 /* Computing MAX */
 		d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) 
 			/ (work[i__] + safe1);
 		s = f2cmax(d__2,d__3);
 	    }
 /* L80: */
 	}
 	berr[j] = s;
 /*        Test stopping criterion. Continue iterating if */
 /*           1) The residual BERR(J) is larger than machine epsilon, and */
 /*           2) BERR(J) decreased by at least a factor of 2 during the */
 /*              last iteration, and */
 /*           3) At most ITMAX iterations tried. */
 	if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) {
 /*           Update solution and try again. */
 	    dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n 
 		    + 1], n, info);
 	    daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
 		    ;
 	    lstres = berr[j];
 	    ++count;
 	    goto L20;
 	}
 /*        Bound error from formula */
 /*        norm(X - XTRUE) / norm(X) .le. FERR = */
 /*        norm( abs(inv(op(A)))* */
 /*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */
 /*        where */
 /*          norm(Z) is the magnitude of the largest component of Z */
 /*          inv(op(A)) is the inverse of op(A) */
 /*          abs(Z) is the componentwise absolute value of the matrix or */
 /*             vector Z */
 /*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
 /*          EPS is machine epsilon */
 /*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
 /*        is incremented by SAFE1 if the i-th component of */
 /*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */
 /*        Use DLACN2 to estimate the infinity-norm of the matrix */
 /*           inv(op(A)) * diag(W), */
 /*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (work[i__] > safe2) {
 		work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * 
 			work[i__];
 	    } else {
 		work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * 
 			work[i__] + safe1;
 	    }
 /* L90: */
 	}
 	kase = 0;
 L100:
 	dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
 		kase, isave);
 	if (kase != 0) {
 	    if (kase == 1) {
 /*              Multiply by diag(W)*inv(op(A)**T). */
 		dgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
 			work[*n + 1], n, info);
 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    work[*n + i__] = work[i__] * work[*n + i__];
 /* L110: */
 		}
 	    } else {
 /*              Multiply by inv(op(A))*diag(W). */
 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    work[*n + i__] = work[i__] * work[*n + i__];
 /* L120: */
 		}
 		dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
 			work[*n + 1], n, info);
 	    }
 	    goto L100;
 	}
 /*        Normalize error. */
 	lstres = 0.;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1));
 	    lstres = f2cmax(d__2,d__3);
 /* L130: */
 	}
 	if (lstres != 0.) {
 	    ferr[j] /= lstres;
 	}
 /* L140: */
    }
    return 0;
 /*     End of DGERFS */
 } /* dgerfs_ */
--- a/lapack-netlib/SRC/dgerfsx.c
+++ b/lapack-netlib/SRC/dgerfsx.c
--- a/lapack-netlib/SRC/dgerq2.c
+++ b/lapack-netlib/SRC/dgerq2.c
@@ -0,0 +1,587 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief \b DGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorit
 hm. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGERQ2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgerq2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgerq2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgerq2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGERQ2 computes an RQ factorization of a real m by n matrix A: */
 /* > A = R * Q. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, if m <= n, the upper triangle of the subarray */
 /* >          A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */
 /* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
 /* >          contain the m by n upper trapezoidal matrix R; the remaining */
 /* >          elements, with the array TAU, represent the orthogonal matrix */
 /* >          Q as a product of elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (M) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
 /* >  A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgerq2_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *), dlarfg_(integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
    doublereal aii;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGERQ2", &i__1, (ftnlen)6);
 	return 0;
    }
    k = f2cmin(*m,*n);
    for (i__ = k; i__ >= 1; --i__) {
 /*        Generate elementary reflector H(i) to annihilate */
 /*        A(m-k+i,1:n-k+i-1) */
 	i__1 = *n - k + i__;
 	dlarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[*m - k 
 		+ i__ + a_dim1], lda, &tau[i__]);
 /*        Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */
 	aii = a[*m - k + i__ + (*n - k + i__) * a_dim1];
 	a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.;
 	i__1 = *m - k + i__ - 1;
 	i__2 = *n - k + i__;
 	dlarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[
 		i__], &a[a_offset], lda, &work[1]);
 	a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii;
 /* L10: */
    }
    return 0;
 /*     End of DGERQ2 */
 } /* dgerq2_ */
--- a/lapack-netlib/SRC/dgerqf.c
+++ b/lapack-netlib/SRC/dgerqf.c
@@ -0,0 +1,710 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 /* > \brief \b DGERQF */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGERQF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgerqf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgerqf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgerqf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGERQF computes an RQ factorization of a real M-by-N matrix A: */
 /* > A = R * Q. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >          if m <= n, the upper triangle of the subarray */
 /* >          A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */
 /* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
 /* >          contain the M-by-N upper trapezoidal matrix R; */
 /* >          the remaining elements, with the array TAU, represent the */
 /* >          orthogonal matrix Q as a product of f2cmin(m,n) elementary */
 /* >          reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,M). */
 /* >          For optimum performance LWORK >= M*NB, where NB is */
 /* >          the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
 /* >  A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int dgerqf_(integer *m, integer *n, doublereal *a, integer *
 	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int dgerq2_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer ib, nb, ki, kk;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer mu, nu, nx;
    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
 	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input arguments */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info == 0) {
 	k = f2cmin(*m,*n);
 	if (k == 0) {
 	    lwkopt = 1;
 	} else {
 	    nb = ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    lwkopt = *m * nb;
 	}
 	work[1] = (doublereal) lwkopt;
 	if (*lwork < f2cmax(1,*m) && ! lquery) {
 	    *info = -7;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGERQF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    if (k == 0) {
 	return 0;
    }
    nbmin = 2;
    nx = 1;
    iws = *m;
    if (nb > 1 && nb < k) {
 /*        Determine when to cross over from blocked to unblocked code. */
 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "DGERQF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {
 /*           Determine if workspace is large enough for blocked code. */
 	    ldwork = *m;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {
 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */
 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "DGERQF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }
    if (nb >= nbmin && nb < k && nx < k) {
 /*        Use blocked code initially. */
 /*        The last kk rows are handled by the block method. */
 	ki = (k - nx - 1) / nb * nb;
 /* Computing MIN */
 	i__1 = k, i__2 = ki + nb;
 	kk = f2cmin(i__1,i__2);
 	i__1 = k - kk + 1;
 	i__2 = -nb;
 	for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ 
 		+= i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);
 /*           Compute the RQ factorization of the current block */
 /*           A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */
 	    i__3 = *n - k + i__ + ib - 1;
 	    dgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], &
 		    work[1], &iinfo);
 	    if (*m - k + i__ > 1) {
 /*              Form the triangular factor of the block reflector */
 /*              H = H(i+ib-1) . . . H(i+1) H(i) */
 		i__3 = *n - k + i__ + ib - 1;
 		dlarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + 
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 /*              Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */
 		i__3 = *m - k + i__ - 1;
 		i__4 = *n - k + i__ + ib - 1;
 		dlarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3,
 			 &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1],
 			 &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork);
 	    }
 /* L10: */
 	}
 	mu = *m - k + i__ + nb - 1;
 	nu = *n - k + i__ + nb - 1;
    } else {
 	mu = *m;
 	nu = *n;
    }
 /*     Use unblocked code to factor the last or only block */
    if (mu > 0 && nu > 0) {
 	dgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    }
    work[1] = (doublereal) iws;
    return 0;
 /*     End of DGERQF */
 } /* dgerqf_ */
--- a/lapack-netlib/SRC/dgesc2.c
+++ b/lapack-netlib/SRC/dgesc2.c
@@ -0,0 +1,604 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 /* > \brief \b DGESC2 solves a system of linear equations using the LU factorization with complete pivoting co
 mputed by sgetc2. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGESC2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesc2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesc2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesc2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) */
 /*       INTEGER            LDA, N */
 /*       DOUBLE PRECISION   SCALE */
 /*       INTEGER            IPIV( * ), JPIV( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ), RHS( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGESC2 solves a system of linear equations */
 /* > */
 /* >           A * X = scale* RHS */
 /* > */
 /* > with a general N-by-N matrix A using the LU factorization with */
 /* > complete pivoting computed by DGETC2. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the  LU part of the factorization of the n-by-n */
 /* >          matrix A computed by DGETC2:  A = P * L * U * Q */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1, N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] RHS */
 /* > \verbatim */
 /* >          RHS is DOUBLE PRECISION array, dimension (N). */
 /* >          On entry, the right hand side vector b. */
 /* >          On exit, the solution vector X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N). */
 /* >          The pivot indices; for 1 <= i <= N, row i of the */
 /* >          matrix has been interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JPIV */
 /* > \verbatim */
 /* >          JPIV is INTEGER array, dimension (N). */
 /* >          The pivot indices; for 1 <= j <= N, column j of the */
 /* >          matrix has been interchanged with column JPIV(j). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] SCALE */
 /* > \verbatim */
 /* >          SCALE is DOUBLE PRECISION */
 /* >          On exit, SCALE contains the scale factor. SCALE is chosen */
 /* >          0 <= SCALE <= 1 to prevent overflow in the solution. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date November 2017 */
 /* > \ingroup doubleGEauxiliary */
 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
 /* >     Umea University, S-901 87 Umea, Sweden. */
 /*  ===================================================================== */
 /* Subroutine */ int dgesc2_(integer *n, doublereal *a, integer *lda, 
 	doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    doublereal d__1, d__2;
    /* Local variables */
    doublereal temp;
    integer i__, j;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *), dlabad_(doublereal *, doublereal *);
    extern doublereal dlamch_(char *);
    extern integer idamax_(integer *, doublereal *, integer *);
    doublereal bignum;
    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, 
 	    integer *, integer *, integer *, integer *);
    doublereal smlnum, eps;
 /*  -- LAPACK auxiliary routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */
 /*  ===================================================================== */
 /*      Set constant to control overflow */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --rhs;
    --ipiv;
    --jpiv;
    /* Function Body */
    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
 /*     Apply permutations IPIV to RHS */
    i__1 = *n - 1;
    dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1);
 /*     Solve for L part */
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = *n;
 	for (j = i__ + 1; j <= i__2; ++j) {
 	    rhs[j] -= a[j + i__ * a_dim1] * rhs[i__];
 /* L10: */
 	}
 /* L20: */
    }
 /*     Solve for U part */
    *scale = 1.;
 /*     Check for scaling */
    i__ = idamax_(n, &rhs[1], &c__1);
    if (smlnum * 2. * (d__1 = rhs[i__], abs(d__1)) > (d__2 = a[*n + *n * 
 	    a_dim1], abs(d__2))) {
 	temp = .5 / (d__1 = rhs[i__], abs(d__1));
 	dscal_(n, &temp, &rhs[1], &c__1);
 	*scale *= temp;
    }
    for (i__ = *n; i__ >= 1; --i__) {
 	temp = 1. / a[i__ + i__ * a_dim1];
 	rhs[i__] *= temp;
 	i__1 = *n;
 	for (j = i__ + 1; j <= i__1; ++j) {
 	    rhs[i__] -= rhs[j] * (a[i__ + j * a_dim1] * temp);
 /* L30: */
 	}
 /* L40: */
    }
 /*     Apply permutations JPIV to the solution (RHS) */
    i__1 = *n - 1;
    dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1);
    return 0;
 /*     End of DGESC2 */
 } /* dgesc2_ */
--- a/lapack-netlib/SRC/dgesdd.c
+++ b/lapack-netlib/SRC/dgesdd.c
--- a/lapack-netlib/SRC/dgesv.c
+++ b/lapack-netlib/SRC/dgesv.c
@@ -0,0 +1,574 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* > \brief <b> DGESV computes the solution to system of linear equations A * X = B for GE matrices</b> */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGESV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
 /*       INTEGER            INFO, LDA, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGESV computes the solution to a real system of linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */
 /* > */
 /* > The LU decomposition with partial pivoting and row interchanges is */
 /* > used to factor A as */
 /* >    A = P * L * U, */
 /* > where P is a permutation matrix, L is unit lower triangular, and U is */
 /* > upper triangular.  The factored form of A is then used to solve the */
 /* > system of equations A * X = B. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the N-by-N coefficient matrix A. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices that define the permutation matrix P; */
 /* >          row i of the matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS matrix of right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, so the solution could not be computed. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEsolve */
 /*  ===================================================================== */
 /* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer 
 	*lda, integer *ipiv, doublereal *b, integer *ldb, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    /* Local variables */
    extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, 
 	    integer *, integer *, integer *), xerbla_(char *, integer *, ftnlen), dgetrs_(char *, integer *, integer *, doublereal *, 
 	    integer *, integer *, doublereal *, integer *, integer *);
 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
    } else if (*nrhs < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGESV ", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Compute the LU factorization of A. */
    dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
    if (*info == 0) {
 /*        Solve the system A*X = B, overwriting B with X. */
 	dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
 		b_offset], ldb, info);
    }
    return 0;
 /*     End of DGESV */
 } /* dgesv_ */
--- a/lapack-netlib/SRC/dgesvd.c
+++ b/lapack-netlib/SRC/dgesvd.c
--- a/lapack-netlib/SRC/dgesvdq.c
+++ b/lapack-netlib/SRC/dgesvdq.c
--- a/lapack-netlib/SRC/dgesvdx.c
+++ b/lapack-netlib/SRC/dgesvdx.c
--- a/lapack-netlib/SRC/dgesvj.c
+++ b/lapack-netlib/SRC/dgesvj.c
--- a/lapack-netlib/SRC/dgesvx.c
+++ b/lapack-netlib/SRC/dgesvx.c
--- a/lapack-netlib/SRC/dgesvxx.c
+++ b/lapack-netlib/SRC/dgesvxx.c
--- a/lapack-netlib/SRC/dgetc2.c
+++ b/lapack-netlib/SRC/dgetc2.c
@@ -0,0 +1,646 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static doublereal c_b10 = -1.;
 /* > \brief \b DGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGETC2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetc2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetc2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetc2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) */
 /*       INTEGER            INFO, LDA, N */
 /*       INTEGER            IPIV( * ), JPIV( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGETC2 computes an LU factorization with complete pivoting of the */
 /* > n-by-n matrix A. The factorization has the form A = P * L * U * Q, */
 /* > where P and Q are permutation matrices, L is lower triangular with */
 /* > unit diagonal elements and U is upper triangular. */
 /* > */
 /* > This is the Level 2 BLAS algorithm. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA, N) */
 /* >          On entry, the n-by-n matrix A to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U*Q; the unit diagonal elements of L are not stored. */
 /* >          If U(k, k) appears to be less than SMIN, U(k, k) is given the */
 /* >          value of SMIN, i.e., giving a nonsingular perturbed system. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension(N). */
 /* >          The pivot indices; for 1 <= i <= N, row i of the */
 /* >          matrix has been interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] JPIV */
 /* > \verbatim */
 /* >          JPIV is INTEGER array, dimension(N). */
 /* >          The pivot indices; for 1 <= j <= N, column j of the */
 /* >          matrix has been interchanged with column JPIV(j). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >           = 0: successful exit */
 /* >           > 0: if INFO = k, U(k, k) is likely to produce overflow if */
 /* >                we try to solve for x in Ax = b. So U is perturbed to */
 /* >                avoid the overflow. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date June 2016 */
 /* > \ingroup doubleGEauxiliary */
 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
 /* >     Umea University, S-901 87 Umea, Sweden. */
 /*  ===================================================================== */
 /* Subroutine */ int dgetc2_(integer *n, doublereal *a, integer *lda, integer 
 	*ipiv, integer *jpiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublereal d__1;
    /* Local variables */
    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    doublereal smin, xmax;
    integer i__, j;
    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *), dlabad_(doublereal *, doublereal *);
    extern doublereal dlamch_(char *);
    integer ip, jp;
    doublereal bignum, smlnum, eps;
    integer ipv, jpv;
 /*  -- LAPACK auxiliary routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */
 /*  ===================================================================== */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --jpiv;
    /* Function Body */
    *info = 0;
 /*     Quick return if possible */
    if (*n == 0) {
 	return 0;
    }
 /*     Set constants to control overflow */
    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
 /*     Handle the case N=1 by itself */
    if (*n == 1) {
 	ipiv[1] = 1;
 	jpiv[1] = 1;
 	if ((d__1 = a[a_dim1 + 1], abs(d__1)) < smlnum) {
 	    *info = 1;
 	    a[a_dim1 + 1] = smlnum;
 	}
 	return 0;
    }
 /*     Factorize A using complete pivoting. */
 /*     Set pivots less than SMIN to SMIN. */
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /*        Find f2cmax element in matrix A */
 	xmax = 0.;
 	i__2 = *n;
 	for (ip = i__; ip <= i__2; ++ip) {
 	    i__3 = *n;
 	    for (jp = i__; jp <= i__3; ++jp) {
 		if ((d__1 = a[ip + jp * a_dim1], abs(d__1)) >= xmax) {
 		    xmax = (d__1 = a[ip + jp * a_dim1], abs(d__1));
 		    ipv = ip;
 		    jpv = jp;
 		}
 /* L10: */
 	    }
 /* L20: */
 	}
 	if (i__ == 1) {
 /* Computing MAX */
 	    d__1 = eps * xmax;
 	    smin = f2cmax(d__1,smlnum);
 	}
 /*        Swap rows */
 	if (ipv != i__) {
 	    dswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda);
 	}
 	ipiv[i__] = ipv;
 /*        Swap columns */
 	if (jpv != i__) {
 	    dswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], &
 		    c__1);
 	}
 	jpiv[i__] = jpv;
 /*        Check for singularity */
 	if ((d__1 = a[i__ + i__ * a_dim1], abs(d__1)) < smin) {
 	    *info = i__;
 	    a[i__ + i__ * a_dim1] = smin;
 	}
 	i__2 = *n;
 	for (j = i__ + 1; j <= i__2; ++j) {
 	    a[j + i__ * a_dim1] /= a[i__ + i__ * a_dim1];
 /* L30: */
 	}
 	i__2 = *n - i__;
 	i__3 = *n - i__;
 	dger_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ 
 		+ (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * a_dim1], 
 		lda);
 /* L40: */
    }
    if ((d__1 = a[*n + *n * a_dim1], abs(d__1)) < smin) {
 	*info = *n;
 	a[*n + *n * a_dim1] = smin;
    }
 /*     Set last pivots to N */
    ipiv[*n] = *n;
    jpiv[*n] = *n;
    return 0;
 /*     End of DGETC2 */
 } /* dgetc2_ */
--- a/lapack-netlib/SRC/dgetf2.c
+++ b/lapack-netlib/SRC/dgetf2.c
@@ -0,0 +1,620 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static doublereal c_b8 = -1.;
 /* > \brief \b DGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row
 interchanges (unblocked algorithm). */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGETF2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGETF2 computes an LU factorization of a general m-by-n matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = P * L * U */
 /* > where P is a permutation matrix, L is lower triangular with unit */
 /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
 /* > triangular (upper trapezoidal if m < n). */
 /* > */
 /* > This is the right-looking Level 2 BLAS version of the algorithm. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -k, the k-th argument had an illegal value */
 /* >          > 0: if INFO = k, U(k,k) is exactly zero. The factorization */
 /* >               has been completed, but the factor U is exactly */
 /* >               singular, and division by zero will occur if it is used */
 /* >               to solve a system of equations. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer *
 	lda, integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublereal d__1;
    /* Local variables */
    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
 	    integer *);
    integer i__, j;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *);
    doublereal sfmin;
    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *);
    extern doublereal dlamch_(char *);
    integer jp;
    extern integer idamax_(integer *, doublereal *, integer *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGETF2", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*m == 0 || *n == 0) {
 	return 0;
    }
 /*     Compute machine safe minimum */
    sfmin = dlamch_("S");
    i__1 = f2cmin(*m,*n);
    for (j = 1; j <= i__1; ++j) {
 /*        Find pivot and test for singularity. */
 	i__2 = *m - j + 1;
 	jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
 	ipiv[j] = jp;
 	if (a[jp + j * a_dim1] != 0.) {
 /*           Apply the interchange to columns 1:N. */
 	    if (jp != j) {
 		dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
 	    }
 /*           Compute elements J+1:M of J-th column. */
 	    if (j < *m) {
 		if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) {
 		    i__2 = *m - j;
 		    d__1 = 1. / a[j + j * a_dim1];
 		    dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
 		} else {
 		    i__2 = *m - j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
 /* L20: */
 		    }
 		}
 	    }
 	} else if (*info == 0) {
 	    *info = j;
 	}
 	if (j < f2cmin(*m,*n)) {
 /*           Update trailing submatrix. */
 	    i__2 = *m - j;
 	    i__3 = *n - j;
 	    dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + (
 		    j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda);
 	}
 /* L10: */
    }
    return 0;
 /*     End of DGETF2 */
 } /* dgetf2_ */
--- a/lapack-netlib/SRC/dgetrf.c
+++ b/lapack-netlib/SRC/dgetrf.c
@@ -0,0 +1,645 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static doublereal c_b16 = 1.;
 static doublereal c_b19 = -1.;
 /* > \brief \b DGETRF */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGETRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGETRF computes an LU factorization of a general M-by-N matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = P * L * U */
 /* > where P is a permutation matrix, L is lower triangular with unit */
 /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
 /* > triangular (upper trapezoidal if m < n). */
 /* > */
 /* > This is the right-looking Level 3 BLAS version of the algorithm. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and division by zero will occur if it is used */
 /* >                to solve a system of equations. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer *
 	lda, integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    /* Local variables */
    integer i__, j;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *);
    integer iinfo;
    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *);
    integer jb, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, 
 	    integer *, integer *, integer *, integer *), dgetrf2_(integer *, 
 	    integer *, doublereal *, integer *, integer *, integer *);
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGETRF", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*m == 0 || *n == 0) {
 	return 0;
    }
 /*     Determine the block size for this environment. */
    nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
    if (nb <= 1 || nb >= f2cmin(*m,*n)) {
 /*        Use unblocked code. */
 	dgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
    } else {
 /*        Use blocked code. */
 	i__1 = f2cmin(*m,*n);
 	i__2 = nb;
 	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 /* Computing MIN */
 	    i__3 = f2cmin(*m,*n) - j + 1;
 	    jb = f2cmin(i__3,nb);
 /*           Factor diagonal and subdiagonal blocks and test for exact */
 /*           singularity. */
 	    i__3 = *m - j + 1;
 	    dgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
 /*           Adjust INFO and the pivot indices. */
 	    if (*info == 0 && iinfo > 0) {
 		*info = iinfo + j - 1;
 	    }
 /* Computing MIN */
 	    i__4 = *m, i__5 = j + jb - 1;
 	    i__3 = f2cmin(i__4,i__5);
 	    for (i__ = j; i__ <= i__3; ++i__) {
 		ipiv[i__] = j - 1 + ipiv[i__];
 /* L10: */
 	    }
 /*           Apply interchanges to columns 1:J-1. */
 	    i__3 = j - 1;
 	    i__4 = j + jb - 1;
 	    dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
 	    if (j + jb <= *n) {
 /*              Apply interchanges to columns J+JB:N. */
 		i__3 = *n - j - jb + 1;
 		i__4 = j + jb - 1;
 		dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
 			ipiv[1], &c__1);
 /*              Compute block row of U. */
 		i__3 = *n - j - jb + 1;
 		dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
 			c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * 
 			a_dim1], lda);
 		if (j + jb <= *m) {
 /*                 Update trailing submatrix. */
 		    i__3 = *m - j - jb + 1;
 		    i__4 = *n - j - jb + 1;
 		    dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, 
 			    &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + 
 			    jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) *
 			     a_dim1], lda);
 		}
 	    }
 /* L20: */
 	}
    }
    return 0;
 /*     End of DGETRF */
 } /* dgetrf_ */
--- a/lapack-netlib/SRC/dgetrf2.c
+++ b/lapack-netlib/SRC/dgetrf2.c
@@ -0,0 +1,683 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static doublereal c_b13 = 1.;
 static doublereal c_b16 = -1.;
 /* > \brief \b DGETRF2 */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /*  Definition: */
 /*  =========== */
 /*        SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO ) */
 /*       INTEGER            INFO, LDA, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGETRF2 computes an LU factorization of a general M-by-N matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = P * L * U */
 /* > where P is a permutation matrix, L is lower triangular with unit */
 /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
 /* > triangular (upper trapezoidal if m < n). */
 /* > */
 /* > This is the recursive version of the algorithm. It divides */
 /* > the matrix into four submatrices: */
 /* > */
 /* >        [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2 */
 /* >    A = [ -----|----- ]  with n1 = f2cmin(m,n)/2 */
 /* >        [  A21 | A22  ]       n2 = n-n1 */
 /* > */
 /* >                                       [ A11 ] */
 /* > The subroutine calls itself to factor [ --- ], */
 /* >                                       [ A12 ] */
 /* >                 [ A12 ] */
 /* > do the swaps on [ --- ], solve A12, update A22, */
 /* >                 [ A22 ] */
 /* > */
 /* > then calls itself to factor A22 and do the swaps on A21. */
 /* > */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and division by zero will occur if it is used */
 /* >                to solve a system of equations. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date June 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgetrf2_(integer *m, integer *n, doublereal *a, integer *
 	lda, integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    doublereal d__1;
    /* Local variables */
    doublereal temp;
    integer i__;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
 	    integer *), dgemm_(char *, char *, integer *, integer *, integer *
 	    , doublereal *, doublereal *, integer *, doublereal *, integer *, 
 	    doublereal *, doublereal *, integer *);
    integer iinfo;
    doublereal sfmin;
    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *);
    integer n1, n2;
    extern doublereal dlamch_(char *);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dlaswp_(
 	    integer *, doublereal *, integer *, integer *, integer *, integer 
 	    *, integer *);
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGETRF2", &i__1, (ftnlen)7);
 	return 0;
    }
 /*     Quick return if possible */
    if (*m == 0 || *n == 0) {
 	return 0;
    }
    if (*m == 1) {
 /*        Use unblocked code for one row case */
 /*        Just need to handle IPIV and INFO */
 	ipiv[1] = 1;
 	if (a[a_dim1 + 1] == 0.) {
 	    *info = 1;
 	}
    } else if (*n == 1) {
 /*        Use unblocked code for one column case */
 /*        Compute machine safe minimum */
 	sfmin = dlamch_("S");
 /*        Find pivot and test for singularity */
 	i__ = idamax_(m, &a[a_dim1 + 1], &c__1);
 	ipiv[1] = i__;
 	if (a[i__ + a_dim1] != 0.) {
 /*           Apply the interchange */
 	    if (i__ != 1) {
 		temp = a[a_dim1 + 1];
 		a[a_dim1 + 1] = a[i__ + a_dim1];
 		a[i__ + a_dim1] = temp;
 	    }
 /*           Compute elements 2:M of the column */
 	    if ((d__1 = a[a_dim1 + 1], abs(d__1)) >= sfmin) {
 		i__1 = *m - 1;
 		d__1 = 1. / a[a_dim1 + 1];
 		dscal_(&i__1, &d__1, &a[a_dim1 + 2], &c__1);
 	    } else {
 		i__1 = *m - 1;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    a[i__ + 1 + a_dim1] /= a[a_dim1 + 1];
 /* L10: */
 		}
 	    }
 	} else {
 	    *info = 1;
 	}
    } else {
 /*        Use recursive code */
 	n1 = f2cmin(*m,*n) / 2;
 	n2 = *n - n1;
 /*               [ A11 ] */
 /*        Factor [ --- ] */
 /*               [ A21 ] */
 	dgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo);
 	if (*info == 0 && iinfo > 0) {
 	    *info = iinfo;
 	}
 /*                              [ A12 ] */
 /*        Apply interchanges to [ --- ] */
 /*                              [ A22 ] */
 	dlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &
 		c__1);
 /*        Solve A12 */
 	dtrsm_("L", "L", "N", "U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[(
 		n1 + 1) * a_dim1 + 1], lda);
 /*        Update A22 */
 	i__1 = *m - n1;
 	dgemm_("N", "N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda, &
 		a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * 
 		a_dim1], lda);
 /*        Factor A22 */
 	i__1 = *m - n1;
 	dgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 
 		1], &iinfo);
 /*        Adjust INFO and the pivot indices */
 	if (*info == 0 && iinfo > 0) {
 	    *info = iinfo + n1;
 	}
 	i__1 = f2cmin(*m,*n);
 	for (i__ = n1 + 1; i__ <= i__1; ++i__) {
 	    ipiv[i__] += n1;
 /* L20: */
 	}
 /*        Apply interchanges to A21 */
 	i__1 = n1 + 1;
 	i__2 = f2cmin(*m,*n);
 	dlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1);
    }
    return 0;
 /*     End of DGETRF2 */
 } /* dgetrf2_ */
--- a/lapack-netlib/SRC/dgetri.c
+++ b/lapack-netlib/SRC/dgetri.c
@@ -0,0 +1,694 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static doublereal c_b20 = -1.;
 static doublereal c_b22 = 1.;
 /* > \brief \b DGETRI */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGETRI + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetri.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetri.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetri.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) */
 /*       INTEGER            INFO, LDA, LWORK, N */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGETRI computes the inverse of a matrix using the LU factorization */
 /* > computed by DGETRF. */
 /* > */
 /* > This method inverts U and then computes inv(A) by solving the system */
 /* > inv(A)*L = inv(U) for inv(A). */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the factors L and U from the factorization */
 /* >          A = P*L*U as computed by DGETRF. */
 /* >          On exit, if INFO = 0, the inverse of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from DGETRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For optimal performance LWORK >= N*NB, where NB is */
 /* >          the optimal blocksize returned by ILAENV. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is */
 /* >                singular and its inverse could not be computed. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgetri_(integer *n, doublereal *a, integer *lda, integer 
 	*ipiv, doublereal *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    integer i__, j;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
 	    integer *, doublereal *, doublereal *, integer *),
 	     dgemv_(char *, integer *, integer *, doublereal *, doublereal *, 
 	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
 	    integer *);
    integer nbmin;
    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
 	    doublereal *, integer *), dtrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *);
    integer jb, nb, jj, jp, nn;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork;
    extern /* Subroutine */ int dtrtri_(char *, char *, integer *, doublereal 
 	    *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    integer iws;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --work;
    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "DGETRI", " ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    lwkopt = *n * nb;
    work[1] = (doublereal) lwkopt;
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -3;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGETRI", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }
 /*     Quick return if possible */
    if (*n == 0) {
 	return 0;
    }
 /*     Form inv(U).  If INFO > 0 from DTRTRI, then U is singular, */
 /*     and the inverse is not computed. */
    dtrtri_("Upper", "Non-unit", n, &a[a_offset], lda, info);
    if (*info > 0) {
 	return 0;
    }
    nbmin = 2;
    ldwork = *n;
    if (nb > 1 && nb < *n) {
 /* Computing MAX */
 	i__1 = ldwork * nb;
 	iws = f2cmax(i__1,1);
 	if (*lwork < iws) {
 	    nb = *lwork / ldwork;
 /* Computing MAX */
 	    i__1 = 2, i__2 = ilaenv_(&c__2, "DGETRI", " ", n, &c_n1, &c_n1, &
 		    c_n1, (ftnlen)6, (ftnlen)1);
 	    nbmin = f2cmax(i__1,i__2);
 	}
    } else {
 	iws = *n;
    }
 /*     Solve the equation inv(A)*L = inv(U) for inv(A). */
    if (nb < nbmin || nb >= *n) {
 /*        Use unblocked code. */
 	for (j = *n; j >= 1; --j) {
 /*           Copy current column of L to WORK and replace with zeros. */
 	    i__1 = *n;
 	    for (i__ = j + 1; i__ <= i__1; ++i__) {
 		work[i__] = a[i__ + j * a_dim1];
 		a[i__ + j * a_dim1] = 0.;
 /* L10: */
 	    }
 /*           Compute current column of inv(A). */
 	    if (j < *n) {
 		i__1 = *n - j;
 		dgemv_("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 
 			+ 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 
 			+ 1], &c__1);
 	    }
 /* L20: */
 	}
    } else {
 /*        Use blocked code. */
 	nn = (*n - 1) / nb * nb + 1;
 	i__1 = -nb;
 	for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
 /* Computing MIN */
 	    i__2 = nb, i__3 = *n - j + 1;
 	    jb = f2cmin(i__2,i__3);
 /*           Copy current block column of L to WORK and replace with */
 /*           zeros. */
 	    i__2 = j + jb - 1;
 	    for (jj = j; jj <= i__2; ++jj) {
 		i__3 = *n;
 		for (i__ = jj + 1; i__ <= i__3; ++i__) {
 		    work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1];
 		    a[i__ + jj * a_dim1] = 0.;
 /* L30: */
 		}
 /* L40: */
 	    }
 /*           Compute current block column of inv(A). */
 	    if (j + jb <= *n) {
 		i__2 = *n - j - jb + 1;
 		dgemm_("No transpose", "No transpose", n, &jb, &i__2, &c_b20, 
 			&a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &
 			ldwork, &c_b22, &a[j * a_dim1 + 1], lda);
 	    }
 	    dtrsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, &
 		    work[j], &ldwork, &a[j * a_dim1 + 1], lda);
 /* L50: */
 	}
    }
 /*     Apply column interchanges. */
    for (j = *n - 1; j >= 1; --j) {
 	jp = ipiv[j];
 	if (jp != j) {
 	    dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
 	}
 /* L60: */
    }
    work[1] = (doublereal) iws;
    return 0;
 /*     End of DGETRI */
 } /* dgetri_ */
--- a/lapack-netlib/SRC/dgetrs.c
+++ b/lapack-netlib/SRC/dgetrs.c
@@ -0,0 +1,620 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c__1 = 1;
 static doublereal c_b12 = 1.;
 static integer c_n1 = -1;
 /* > \brief \b DGETRS */
 /*  =========== DOCUMENTATION =========== */
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 /* > \htmlonly */
 /* > Download DGETRS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGETRS solves a system of linear equations */
 /* >    A * X = B  or  A**T * X = B */
 /* > with a general N-by-N matrix A using the LU factorization computed */
 /* > by DGETRF. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B  (No transpose) */
 /* >          = 'T':  A**T* X = B  (Transpose) */
 /* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          The factors L and U from the factorization A = P*L*U */
 /* >          as computed by DGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from DGETRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
 /* >          On entry, the right hand side matrix B. */
 /* >          On exit, the solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date December 2016 */
 /* > \ingroup doubleGEcomputational */
 /*  ===================================================================== */
 /* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs, 
 	doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
 	ldb, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    /* Local variables */
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
 	    integer *, integer *, doublereal *, doublereal *, integer *, 
 	    doublereal *, integer *), xerbla_(
 	    char *, integer *, ftnlen), dlaswp_(integer *, doublereal *, 
 	    integer *, integer *, integer *, integer *, integer *);
    logical notran;
 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */
 /*  ===================================================================== */
 /*     Test the input parameters. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGETRS", &i__1, (ftnlen)6);
 	return 0;
    }
 /*     Quick return if possible */
    if (*n == 0 || *nrhs == 0) {
 	return 0;
    }
    if (notran) {
 /*        Solve A * X = B. */
 /*        Apply row interchanges to the right hand sides. */
 	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
 /*        Solve L*X = B, overwriting B with X. */
 	dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[
 		a_offset], lda, &b[b_offset], ldb);
 /*        Solve U*X = B, overwriting B with X. */
 	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, &
 		a[a_offset], lda, &b[b_offset], ldb);
    } else {
 /*        Solve A**T * X = B. */
 /*        Solve U**T *X = B, overwriting B with X. */
 	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[
 		a_offset], lda, &b[b_offset], ldb);
 /*        Solve L**T *X = B, overwriting B with X. */
 	dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[
 		a_offset], lda, &b[b_offset], ldb);
 /*        Apply row interchanges to the solution vectors. */
 	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
    }
    return 0;
 /*     End of DGETRS */
 } /* dgetrs_ */
--- a/lapack-netlib/SRC/dgetsls.c
+++ b/lapack-netlib/SRC/dgetsls.c
@@ -0,0 +1,931 @@
 /* f2c.h  --  Standard Fortran to C header file */
 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif
 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;
 #define TRUE_ (1)
 #define FALSE_ (0)
 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif
 /* I/O stuff */
 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;
 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;
 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;
 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;
 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;
 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;
 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;
 #define VOID void
 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};
 typedef union Multitype Multitype;
 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;
 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;
 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
 /* procedure parameter types for -A and -C++ */
 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif
 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */
 /* Table of constant values */
 static integer c_n1 = -1;
 static integer c_n2 = -2;
 static doublereal c_b23 = 0.;
 static integer c__0 = 0;
 /* > \brief \b DGETSLS */
 /*  Definition: */
 /*  =========== */
 /*       SUBROUTINE DGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, */
 /*     $                     WORK, LWORK, INFO ) */
 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS */
 /*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * ) */
 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGETSLS solves overdetermined or underdetermined real linear systems */
 /* > involving an M-by-N matrix A, using a tall skinny QR or short wide LQ */
 /* > factorization of A.  It is assumed that A has full rank. */
 /* > */
 /* > */
 /* > */
 /* > The following options are provided: */
 /* > */
 /* > 1. If TRANS = 'N' and m >= n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A*X ||. */
 /* > */
 /* > 2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
 /* >    an underdetermined system A * X = B. */
 /* > */
 /* > 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of */
 /* >    an undetermined system A**T * X = B. */
 /* > */
 /* > 4. If TRANS = 'T' and m < n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A**T * X ||. */
 /* > */
 /* > Several right hand side vectors b and solution vectors x can be */
 /* > handled in a single call; they are stored as the columns of the */
 /* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
 /* > matrix X. */
 /* > \endverbatim */
 /*  Arguments: */
 /*  ========== */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N': the linear system involves A; */
 /* >          = 'T': the linear system involves A**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of */
 /* >          columns of the matrices B and X. NRHS >=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >          A is overwritten by details of its QR or LQ */
 /* >          factorization as returned by DGEQR or DGELQ. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
 /* >          On entry, the matrix B of right hand side vectors, stored */
 /* >          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
 /* >          if TRANS = 'T'. */
 /* >          On exit, if INFO = 0, B is overwritten by the solution */
 /* >          vectors, stored columnwise: */
 /* >          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
 /* >          squares solution vectors. */
 /* >          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'T' and m >= n, rows 1 to M of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'T' and m < n, rows 1 to M of B contain the */
 /* >          least squares solution vectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= MAX(1,M,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
 /* >          or optimal, if query was assumed) LWORK. */
 /* >          See LWORK for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1 or -2, then a workspace query is assumed. */
 /* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
 /* >          optimal performance and returns this value in WORK(1). */
 /* >          If LWORK = -2, the routine calculates minimal size of WORK and */
 /* >          returns this value in WORK(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO =  i, the i-th diagonal element of the */
 /* >                triangular factor of A is zero, so that A does not have */
 /* >                full rank; the least squares solution could not be */
 /* >                computed. */
 /* > \endverbatim */
 /*  Authors: */
 /*  ======== */
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 /* > \date June 2017 */
 /* > \ingroup doubleGEsolve */
 /*  ===================================================================== */
 /* Subroutine */ int dgetsls_(char *trans, integer *m, integer *n, integer *
 	nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, 
 	doublereal *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    /* Local variables */
    doublereal anrm, bnrm;
    logical tran;
    integer brow, tszm, tszo, info2, i__, j, iascl, ibscl;
    extern /* Subroutine */ int dgelq_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, integer *, doublereal *, integer *, 
 	    integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dgeqr_(integer *, integer *, doublereal *, 
 	    integer *, doublereal *, integer *, doublereal *, integer *, 
 	    integer *);
    integer minmn, maxmn;
    doublereal workq[1];
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *);
    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
 	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
 	    integer *, integer *);
    doublereal tq[5];
    extern /* Subroutine */ int dgemlq_(char *, char *, integer *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, integer *), dlaset_(char *, integer *, integer *, doublereal 
 	    *, doublereal *, doublereal *, integer *), xerbla_(char *,
 	     integer *, ftnlen), dgemqr_(char *, char *, integer *, integer *,
 	     integer *, doublereal *, integer *, doublereal *, integer *, 
 	    doublereal *, integer *, doublereal *, integer *, integer *);
    integer scllen;
    doublereal bignum, smlnum;
    integer wsizem, wsizeo;
    logical lquery;
    extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, 
 	    integer *, doublereal *, integer *, doublereal *, integer *, 
 	    integer *);
    integer lw1, lw2, mnk, lwm, lwo;
 /*  -- LAPACK driver routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */
 /*  ===================================================================== */
 /*     Test the input arguments. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;
    /* Function Body */
    *info = 0;
    minmn = f2cmin(*m,*n);
    maxmn = f2cmax(*m,*n);
    mnk = f2cmax(minmn,*nrhs);
    tran = lsame_(trans, "T");
    lquery = *lwork == -1 || *lwork == -2;
    if (! (lsame_(trans, "N") || lsame_(trans, "T"))) {
 	*info = -1;
    } else if (*m < 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -6;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m);
 	if (*ldb < f2cmax(i__1,*n)) {
 	    *info = -8;
 	}
    }
    if (*info == 0) {
 /*     Determine the block size and minimum LWORK */
 	if (*m >= *n) {
 	    dgeqr_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
 	    tszo = (integer) tq[0];
 	    lwo = (integer) workq[0];
 	    dgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszo, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwo, i__2 = (integer) workq[0];
 	    lwo = f2cmax(i__1,i__2);
 	    dgeqr_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
 	    tszm = (integer) tq[0];
 	    lwm = (integer) workq[0];
 	    dgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszm, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwm, i__2 = (integer) workq[0];
 	    lwm = f2cmax(i__1,i__2);
 	    wsizeo = tszo + lwo;
 	    wsizem = tszm + lwm;
 	} else {
 	    dgelq_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
 	    tszo = (integer) tq[0];
 	    lwo = (integer) workq[0];
 	    dgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszo, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwo, i__2 = (integer) workq[0];
 	    lwo = f2cmax(i__1,i__2);
 	    dgelq_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
 	    tszm = (integer) tq[0];
 	    lwm = (integer) workq[0];
 	    dgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszm, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwm, i__2 = (integer) workq[0];
 	    lwm = f2cmax(i__1,i__2);
 	    wsizeo = tszo + lwo;
 	    wsizem = tszm + lwm;
 	}
 	if (*lwork < wsizem && ! lquery) {
 	    *info = -10;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGETSLS", &i__1, (ftnlen)7);
 	work[1] = (doublereal) wsizeo;
 	return 0;
    }
    if (lquery) {
 	if (*lwork == -1) {
 	    work[1] = (real) wsizeo;
 	}
 	if (*lwork == -2) {
 	    work[1] = (real) wsizem;
 	}
 	return 0;
    }
    if (*lwork < wsizeo) {
 	lw1 = tszm;
 	lw2 = lwm;
    } else {
 	lw1 = tszo;
 	lw2 = lwo;
    }
 /*     Quick return if possible */
 /* Computing MIN */
    i__1 = f2cmin(*m,*n);
    if (f2cmin(i__1,*nrhs) == 0) {
 	i__1 = f2cmax(*m,*n);
 	dlaset_("FULL", &i__1, nrhs, &c_b23, &c_b23, &b[b_offset], ldb);
 	return 0;
    }
 /*     Get machine parameters */
    smlnum = dlamch_("S") / dlamch_("P");
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
 /*     Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */
    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
    iascl = 0;
    if (anrm > 0. && anrm < smlnum) {
 /*        Scale matrix norm up to SMLNUM */
 	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 1;
    } else if (anrm > bignum) {
 /*        Scale matrix norm down to BIGNUM */
 	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 2;
    } else if (anrm == 0.) {
 /*        Matrix all zero. Return zero solution. */
 	dlaset_("F", &maxmn, nrhs, &c_b23, &c_b23, &b[b_offset], ldb);
 	goto L50;
    }
    brow = *m;
    if (tran) {
 	brow = *n;
    }
    bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, &work[1]);
    ibscl = 0;
    if (bnrm > 0. && bnrm < smlnum) {
 /*        Scale matrix norm up to SMLNUM */
 	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 1;
    } else if (bnrm > bignum) {
 /*        Scale matrix norm down to BIGNUM */
 	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 2;
    }
    if (*m >= *n) {
 /*        compute QR factorization of A */
 	dgeqr_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
 		info);
 	if (! tran) {
 /*           Least-Squares Problem f2cmin || A * X - B || */
 /*           B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */
 	    dgemqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
 /*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */
 	    dtrtrs_("U", "N", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 	    scllen = *n;
 	} else {
 /*           Overdetermined system of equations A**T * X = B */
 /*           B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */
 	    dtrtrs_("U", "T", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 /*           B(N+1:M,1:NRHS) = ZERO */
 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = *n + 1; i__ <= i__2; ++i__) {
 		    b[i__ + j * b_dim1] = 0.;
 /* L10: */
 		}
 /* L20: */
 	    }
 /*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */
 	    dgemqr_("L", "N", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
 	    scllen = *m;
 	}
    } else {
 /*        Compute LQ factorization of A */
 	dgelq_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
 		info);
 /*        workspace at least M, optimally M*NB. */
 	if (! tran) {
 /*           underdetermined system of equations A * X = B */
 /*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */
 	    dtrtrs_("L", "N", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 /*           B(M+1:N,1:NRHS) = 0 */
 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *n;
 		for (i__ = *m + 1; i__ <= i__2; ++i__) {
 		    b[i__ + j * b_dim1] = 0.;
 /* L30: */
 		}
 /* L40: */
 	    }
 /*           B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */
 	    dgemlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
 /*           workspace at least NRHS, optimally NRHS*NB */
 	    scllen = *n;
 	} else {
 /*           overdetermined system f2cmin || A**T * X - B || */
 /*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */
 	    dgemlq_("L", "N", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
 /*           workspace at least NRHS, optimally NRHS*NB */
 /*           B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */
 	    dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], 
 		    lda, &b[b_offset], ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 	    scllen = *m;
 	}
    }
 /*     Undo scaling */
    if (iascl == 1) {
 	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (iascl == 2) {
 	dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }
    if (ibscl == 1) {
 	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (ibscl == 2) {
 	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }
 L50:
    work[1] = (doublereal) (tszo + lwo);
    return 0;
 /*     End of DGETSLS */
 } /* dgetsls_ */