Add C versions as fallback

4 years ago · 5d23defa91
--- a/lapack-netlib/SRC/sgbtrs.c
+++ b/lapack-netlib/SRC/sgbtrs.c
@@ -0,0 +1,684 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static real c_b7 = -1.f;
 static integer c__1 = 1;
 static real c_b23 = 1.f;

 /* > \brief \b SGBTRS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGBTRS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgbtrs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgbtrs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgbtrs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, */
 /*                          INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               AB( LDAB, * ), B( LDB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGBTRS solves a system of linear equations */
 /* >    A * X = B  or  A**T * X = B */
 /* > with a general band matrix A using the LU factorization computed */
 /* > by SGBTRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations. */
 /* >          = 'N':  A * X = B  (No transpose) */
 /* >          = 'T':  A**T* X = B  (Transpose) */
 /* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >          The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >          The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AB */
 /* > \verbatim */
 /* >          AB is REAL array, dimension (LDAB,N) */
 /* >          Details of the LU factorization of the band matrix A, as */
 /* >          computed by SGBTRF.  U is stored as an upper triangular band */
 /* >          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
 /* >          the multipliers used during the factorization are stored in */
 /* >          rows KL+KU+2 to 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= N, row i of the matrix was */
 /* >          interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          On entry, the right hand side matrix B. */
 /* >          On exit, the solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGBcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgbtrs_(char *trans, integer *n, integer *kl, integer *
 	ku, integer *nrhs, real *ab, integer *ldab, integer *ipiv, real *b, 
 	integer *ldb, integer *info)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
 	    integer *, real *, integer *, real *, integer *);
    integer i__, j, l;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
 	    real *, integer *, real *, integer *, real *, real *, integer *);
    logical lnoti;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
 	    integer *), stbsv_(char *, char *, char *, integer *, integer *, 
 	    real *, integer *, real *, integer *);
    integer kd, lm;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical notran;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kl < 0) {
 	*info = -3;
    } else if (*ku < 0) {
 	*info = -4;
    } else if (*nrhs < 0) {
 	*info = -5;
    } else if (*ldab < (*kl << 1) + *ku + 1) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -10;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGBTRS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	return 0;
    }

    kd = *ku + *kl + 1;
    lnoti = *kl > 0;

    if (notran) {

 /*        Solve  A*X = B. */

 /*        Solve L*X = B, overwriting B with X. */

 /*        L is represented as a product of permutations and unit lower */
 /*        triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), */
 /*        where each transformation L(i) is a rank-one modification of */
 /*        the identity matrix. */

 	if (lnoti) {
 	    i__1 = *n - 1;
 	    for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 		i__2 = *kl, i__3 = *n - j;
 		lm = f2cmin(i__2,i__3);
 		l = ipiv[j];
 		if (l != j) {
 		    sswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb);
 		}
 		sger_(&lm, nrhs, &c_b7, &ab[kd + 1 + j * ab_dim1], &c__1, &b[
 			j + b_dim1], ldb, &b[j + 1 + b_dim1], ldb);
 /* L10: */
 	    }
 	}

 	i__1 = *nrhs;
 	for (i__ = 1; i__ <= i__1; ++i__) {

 /*           Solve U*X = B, overwriting B with X. */

 	    i__2 = *kl + *ku;
 	    stbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &ab[
 		    ab_offset], ldab, &b[i__ * b_dim1 + 1], &c__1);
 /* L20: */
 	}

    } else {

 /*        Solve A**T*X = B. */

 	i__1 = *nrhs;
 	for (i__ = 1; i__ <= i__1; ++i__) {

 /*           Solve U**T*X = B, overwriting B with X. */

 	    i__2 = *kl + *ku;
 	    stbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &ab[ab_offset],
 		     ldab, &b[i__ * b_dim1 + 1], &c__1);
 /* L30: */
 	}

 /*        Solve L**T*X = B, overwriting B with X. */

 	if (lnoti) {
 	    for (j = *n - 1; j >= 1; --j) {
 /* Computing MIN */
 		i__1 = *kl, i__2 = *n - j;
 		lm = f2cmin(i__1,i__2);
 		sgemv_("Transpose", &lm, nrhs, &c_b7, &b[j + 1 + b_dim1], ldb,
 			 &ab[kd + 1 + j * ab_dim1], &c__1, &c_b23, &b[j + 
 			b_dim1], ldb);
 		l = ipiv[j];
 		if (l != j) {
 		    sswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb);
 		}
 /* L40: */
 	    }
 	}
    }
    return 0;

 /*     End of SGBTRS */

 } /* sgbtrs_ */

--- a/lapack-netlib/SRC/sgebak.c
+++ b/lapack-netlib/SRC/sgebak.c
@@ -0,0 +1,674 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGEBAK */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEBAK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgebak.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgebak.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgebak.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, */
 /*                          INFO ) */

 /*       CHARACTER          JOB, SIDE */
 /*       INTEGER            IHI, ILO, INFO, LDV, M, N */
 /*       REAL               V( LDV, * ), SCALE( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEBAK forms the right or left eigenvectors of a real general matrix */
 /* > by backward transformation on the computed eigenvectors of the */
 /* > balanced matrix output by SGEBAL. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOB */
 /* > \verbatim */
 /* >          JOB is CHARACTER*1 */
 /* >          Specifies the type of backward transformation required: */
 /* >          = 'N': do nothing, return immediately; */
 /* >          = 'P': do backward transformation for permutation only; */
 /* >          = 'S': do backward transformation for scaling only; */
 /* >          = 'B': do backward transformations for both permutation and */
 /* >                 scaling. */
 /* >          JOB must be the same as the argument JOB supplied to SGEBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'R':  V contains right eigenvectors; */
 /* >          = 'L':  V contains left eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrix V.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* >          The integers ILO and IHI determined by SGEBAL. */
 /* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] SCALE */
 /* > \verbatim */
 /* >          SCALE is REAL array, dimension (N) */
 /* >          Details of the permutation and scaling factors, as returned */
 /* >          by SGEBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix V.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] V */
 /* > \verbatim */
 /* >          V is REAL array, dimension (LDV,M) */
 /* >          On entry, the matrix of right or left eigenvectors to be */
 /* >          transformed, as returned by SHSEIN or STREVC. */
 /* >          On exit, V is overwritten by the transformed eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the array V. LDV >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgebak_(char *job, char *side, integer *n, integer *ilo, 
 	integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer 
 	*info)
 {
    /* System generated locals */
    integer v_dim1, v_offset, i__1;

    /* Local variables */
    integer i__, k;
    real s;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    logical leftv;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
 	    integer *);
    integer ii;
    extern /* Subroutine */ int xerbla_(char *, integer *,ftnlen);
    logical rightv;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Decode and Test the input parameters */

    /* Parameter adjustments */
    --scale;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;

    /* Function Body */
    rightv = lsame_(side, "R");
    leftv = lsame_(side, "L");

    *info = 0;
    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
 	    && ! lsame_(job, "B")) {
 	*info = -1;
    } else if (! rightv && ! leftv) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
 	*info = -4;
    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
 	*info = -5;
    } else if (*m < 0) {
 	*info = -7;
    } else if (*ldv < f2cmax(1,*n)) {
 	*info = -9;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEBAK", &i__1,(ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }
    if (*m == 0) {
 	return 0;
    }
    if (lsame_(job, "N")) {
 	return 0;
    }

    if (*ilo == *ihi) {
 	goto L30;
    }

 /*     Backward balance */

    if (lsame_(job, "S") || lsame_(job, "B")) {

 	if (rightv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
 		s = scale[i__];
 		sscal_(m, &s, &v[i__ + v_dim1], ldv);
 /* L10: */
 	    }
 	}

 	if (leftv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
 		s = 1.f / scale[i__];
 		sscal_(m, &s, &v[i__ + v_dim1], ldv);
 /* L20: */
 	    }
 	}

    }

 /*     Backward permutation */

 /*     For  I = ILO-1 step -1 until 1, */
 /*              IHI+1 step 1 until N do -- */

 L30:
    if (lsame_(job, "P") || lsame_(job, "B")) {
 	if (rightv) {
 	    i__1 = *n;
 	    for (ii = 1; ii <= i__1; ++ii) {
 		i__ = ii;
 		if (i__ >= *ilo && i__ <= *ihi) {
 		    goto L40;
 		}
 		if (i__ < *ilo) {
 		    i__ = *ilo - ii;
 		}
 		k = scale[i__];
 		if (k == i__) {
 		    goto L40;
 		}
 		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L40:
 		;
 	    }
 	}

 	if (leftv) {
 	    i__1 = *n;
 	    for (ii = 1; ii <= i__1; ++ii) {
 		i__ = ii;
 		if (i__ >= *ilo && i__ <= *ihi) {
 		    goto L50;
 		}
 		if (i__ < *ilo) {
 		    i__ = *ilo - ii;
 		}
 		k = scale[i__];
 		if (k == i__) {
 		    goto L50;
 		}
 		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L50:
 		;
 	    }
 	}
    }

    return 0;

 /*     End of SGEBAK */

 } /* sgebak_ */

--- a/lapack-netlib/SRC/sgebal.c
+++ b/lapack-netlib/SRC/sgebal.c
@@ -0,0 +1,838 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SGEBAL */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEBAL + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgebal.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgebal.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgebal.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) */

 /*       CHARACTER          JOB */
 /*       INTEGER            IHI, ILO, INFO, LDA, N */
 /*       REAL               A( LDA, * ), SCALE( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEBAL balances a general real matrix A.  This involves, first, */
 /* > permuting A by a similarity transformation to isolate eigenvalues */
 /* > in the first 1 to ILO-1 and last IHI+1 to N elements on the */
 /* > diagonal; and second, applying a diagonal similarity transformation */
 /* > to rows and columns ILO to IHI to make the rows and columns as */
 /* > close in norm as possible.  Both steps are optional. */
 /* > */
 /* > Balancing may reduce the 1-norm of the matrix, and improve the */
 /* > accuracy of the computed eigenvalues and/or eigenvectors. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOB */
 /* > \verbatim */
 /* >          JOB is CHARACTER*1 */
 /* >          Specifies the operations to be performed on A: */
 /* >          = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0 */
 /* >                  for i = 1,...,N; */
 /* >          = 'P':  permute only; */
 /* >          = 'S':  scale only; */
 /* >          = 'B':  both permute and scale. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the input matrix A. */
 /* >          On exit,  A is overwritten by the balanced matrix. */
 /* >          If JOB = 'N', A is not referenced. */
 /* >          See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > \param[out] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* >          ILO and IHI are set to integers such that on exit */
 /* >          A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. */
 /* >          If JOB = 'N' or 'S', ILO = 1 and IHI = N. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] SCALE */
 /* > \verbatim */
 /* >          SCALE is REAL array, dimension (N) */
 /* >          Details of the permutations and scaling factors applied to */
 /* >          A.  If P(j) is the index of the row and column interchanged */
 /* >          with row and column j and D(j) is the scaling factor */
 /* >          applied to row and column j, then */
 /* >          SCALE(j) = P(j)    for j = 1,...,ILO-1 */
 /* >                   = D(j)    for j = ILO,...,IHI */
 /* >                   = P(j)    for j = IHI+1,...,N. */
 /* >          The order in which the interchanges are made is N to IHI+1, */
 /* >          then 1 to ILO-1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The permutations consist of row and column interchanges which put */
 /* >  the matrix in the form */
 /* > */
 /* >             ( T1   X   Y  ) */
 /* >     P A P = (  0   B   Z  ) */
 /* >             (  0   0   T2 ) */
 /* > */
 /* >  where T1 and T2 are upper triangular matrices whose eigenvalues lie */
 /* >  along the diagonal.  The column indices ILO and IHI mark the starting */
 /* >  and ending columns of the submatrix B. Balancing consists of applying */
 /* >  a diagonal similarity transformation inv(D) * B * D to make the */
 /* >  1-norms of each row of B and its corresponding column nearly equal. */
 /* >  The output matrix is */
 /* > */
 /* >     ( T1     X*D          Y    ) */
 /* >     (  0  inv(D)*B*D  inv(D)*Z ). */
 /* >     (  0      0           T2   ) */
 /* > */
 /* >  Information about the permutations P and the diagonal matrix D is */
 /* >  returned in the vector SCALE. */
 /* > */
 /* >  This subroutine is based on the EISPACK routine BALANC. */
 /* > */
 /* >  Modified by Tzu-Yi Chen, Computer Science Division, University of */
 /* >    California at Berkeley, USA */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgebal_(char *job, integer *n, real *a, integer *lda, 
 	integer *ilo, integer *ihi, real *scale, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1, r__2;

    /* Local variables */
    integer iexc;
    extern real snrm2_(integer *, real *, integer *);
    real c__, f, g;
    integer i__, j, k, l, m;
    real r__, s;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
 	    sswap_(integer *, real *, integer *, real *, integer *);
    real sfmin1, sfmin2, sfmax1, sfmax2, ca, ra;
    extern real slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *,ftnlen);
    extern integer isamax_(integer *, real *, integer *);
    extern logical sisnan_(real *);
    logical noconv;
    integer ica, ira;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --scale;

    /* Function Body */
    *info = 0;
    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
 	    && ! lsame_(job, "B")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEBAL", &i__1,(ftnlen)6);
 	return 0;
    }

    k = 1;
    l = *n;

    if (*n == 0) {
 	goto L210;
    }

    if (lsame_(job, "N")) {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    scale[i__] = 1.f;
 /* L10: */
 	}
 	goto L210;
    }

    if (lsame_(job, "S")) {
 	goto L120;
    }

 /*     Permutation to isolate eigenvalues if possible */

    goto L50;

 /*     Row and column exchange. */

 L20:
    scale[m] = (real) j;
    if (j == m) {
 	goto L30;
    }

    sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
    i__1 = *n - k + 1;
    sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);

 L30:
    switch (iexc) {
 	case 1:  goto L40;
 	case 2:  goto L80;
    }

 /*     Search for rows isolating an eigenvalue and push them down. */

 L40:
    if (l == 1) {
 	goto L210;
    }
    --l;

 L50:
    for (j = l; j >= 1; --j) {

 	i__1 = l;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (i__ == j) {
 		goto L60;
 	    }
 	    if (a[j + i__ * a_dim1] != 0.f) {
 		goto L70;
 	    }
 L60:
 	    ;
 	}

 	m = l;
 	iexc = 1;
 	goto L20;
 L70:
 	;
    }

    goto L90;

 /*     Search for columns isolating an eigenvalue and push them left. */

 L80:
    ++k;

 L90:
    i__1 = l;
    for (j = k; j <= i__1; ++j) {

 	i__2 = l;
 	for (i__ = k; i__ <= i__2; ++i__) {
 	    if (i__ == j) {
 		goto L100;
 	    }
 	    if (a[i__ + j * a_dim1] != 0.f) {
 		goto L110;
 	    }
 L100:
 	    ;
 	}

 	m = k;
 	iexc = 2;
 	goto L20;
 L110:
 	;
    }

 L120:
    i__1 = l;
    for (i__ = k; i__ <= i__1; ++i__) {
 	scale[i__] = 1.f;
 /* L130: */
    }

    if (lsame_(job, "P")) {
 	goto L210;
    }

 /*     Balance the submatrix in rows K to L. */

 /*     Iterative loop for norm reduction */

    sfmin1 = slamch_("S") / slamch_("P");
    sfmax1 = 1.f / sfmin1;
    sfmin2 = sfmin1 * 2.f;
    sfmax2 = 1.f / sfmin2;
 L140:
    noconv = FALSE_;

    i__1 = l;
    for (i__ = k; i__ <= i__1; ++i__) {

 	i__2 = l - k + 1;
 	c__ = snrm2_(&i__2, &a[k + i__ * a_dim1], &c__1);
 	i__2 = l - k + 1;
 	r__ = snrm2_(&i__2, &a[i__ + k * a_dim1], lda);
 	ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
 	ca = (r__1 = a[ica + i__ * a_dim1], abs(r__1));
 	i__2 = *n - k + 1;
 	ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda);
 	ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], abs(r__1));

 /*        Guard against zero C or R due to underflow. */

 	if (c__ == 0.f || r__ == 0.f) {
 	    goto L200;
 	}
 	g = r__ / 2.f;
 	f = 1.f;
 	s = c__ + r__;
 L160:
 /* Computing MAX */
 	r__1 = f2cmax(f,c__);
 /* Computing MIN */
 	r__2 = f2cmin(r__,g);
 	if (c__ >= g || f2cmax(r__1,ca) >= sfmax2 || f2cmin(r__2,ra) <= sfmin2) {
 	    goto L170;
 	}
 	f *= 2.f;
 	c__ *= 2.f;
 	ca *= 2.f;
 	r__ /= 2.f;
 	g /= 2.f;
 	ra /= 2.f;
 	goto L160;

 L170:
 	g = c__ / 2.f;
 L180:
 /* Computing MIN */
 	r__1 = f2cmin(f,c__), r__1 = f2cmin(r__1,g);
 	if (g < r__ || f2cmax(r__,ra) >= sfmax2 || f2cmin(r__1,ca) <= sfmin2) {
 	    goto L190;
 	}
 	r__1 = c__ + f + ca + r__ + g + ra;
 	if (sisnan_(&r__1)) {

 /*           Exit if NaN to avoid infinite loop */

 	    *info = -3;
 	    i__2 = -(*info);
 	    xerbla_("SGEBAL", &i__2, (ftnlen)6);
 	    return 0;
 	}
 	f /= 2.f;
 	c__ /= 2.f;
 	g /= 2.f;
 	ca /= 2.f;
 	r__ *= 2.f;
 	ra *= 2.f;
 	goto L180;

 /*        Now balance. */

 L190:
 	if (c__ + r__ >= s * .95f) {
 	    goto L200;
 	}
 	if (f < 1.f && scale[i__] < 1.f) {
 	    if (f * scale[i__] <= sfmin1) {
 		goto L200;
 	    }
 	}
 	if (f > 1.f && scale[i__] > 1.f) {
 	    if (scale[i__] >= sfmax1 / f) {
 		goto L200;
 	    }
 	}
 	g = 1.f / f;
 	scale[i__] *= f;
 	noconv = TRUE_;

 	i__2 = *n - k + 1;
 	sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
 	sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);

 L200:
 	;
    }

    if (noconv) {
 	goto L140;
    }

 L210:
    *ilo = k;
    *ihi = l;

    return 0;

 /*     End of SGEBAL */

 } /* sgebal_ */

--- a/lapack-netlib/SRC/sgebd2.c
+++ b/lapack-netlib/SRC/sgebd2.c
@@ -0,0 +1,744 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEBD2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgebd2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgebd2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgebd2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       REAL               A( LDA, * ), D( * ), E( * ), TAUP( * ), */
 /*      $                   TAUQ( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEBD2 reduces a real general m by n matrix A to upper or lower */
 /* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */
 /* > */
 /* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows in the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns in the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the m by n general matrix to be reduced. */
 /* >          On exit, */
 /* >          if m >= n, the diagonal and the first superdiagonal are */
 /* >            overwritten with the upper bidiagonal matrix B; the */
 /* >            elements below the diagonal, with the array TAUQ, represent */
 /* >            the orthogonal matrix Q as a product of elementary */
 /* >            reflectors, and the elements above the first superdiagonal, */
 /* >            with the array TAUP, represent the orthogonal matrix P as */
 /* >            a product of elementary reflectors; */
 /* >          if m < n, the diagonal and the first subdiagonal are */
 /* >            overwritten with the lower bidiagonal matrix B; the */
 /* >            elements below the first subdiagonal, with the array TAUQ, */
 /* >            represent the orthogonal matrix Q as a product of */
 /* >            elementary reflectors, and the elements above the diagonal, */
 /* >            with the array TAUP, represent the orthogonal matrix P as */
 /* >            a product of elementary reflectors. */
 /* >          See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (f2cmin(M,N)) */
 /* >          The diagonal elements of the bidiagonal matrix B: */
 /* >          D(i) = A(i,i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is REAL array, dimension (f2cmin(M,N)-1) */
 /* >          The off-diagonal elements of the bidiagonal matrix B: */
 /* >          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */
 /* >          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUQ */
 /* > \verbatim */
 /* >          TAUQ is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix Q. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUP */
 /* > \verbatim */
 /* >          TAUP is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix P. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (f2cmax(M,N)) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit. */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrices Q and P are represented as products of elementary */
 /* >  reflectors: */
 /* > */
 /* >  If m >= n, */
 /* > */
 /* >     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1) */
 /* > */
 /* >  Each H(i) and G(i) has the form: */
 /* > */
 /* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
 /* > */
 /* >  where tauq and taup are real scalars, and v and u are real vectors; */
 /* >  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */
 /* >  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */
 /* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
 /* > */
 /* >  If m < n, */
 /* > */
 /* >     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m) */
 /* > */
 /* >  Each H(i) and G(i) has the form: */
 /* > */
 /* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
 /* > */
 /* >  where tauq and taup are real scalars, and v and u are real vectors; */
 /* >  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */
 /* >  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */
 /* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples: */
 /* > */
 /* >  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n): */
 /* > */
 /* >    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 ) */
 /* >    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 ) */
 /* >    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 ) */
 /* >    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 ) */
 /* >    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 ) */
 /* >    (  v1  v2  v3  v4  v5 ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of B, vi */
 /* >  denotes an element of the vector defining H(i), and ui an element of */
 /* >  the vector defining G(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgebd2_(integer *m, integer *n, real *a, integer *lda, 
 	real *d__, real *e, real *tauq, real *taup, real *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;

    /* Local variables */
    integer i__;
    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
 	    integer *, real *, real *, integer *, real *), xerbla_(
 	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
 	    integer *, real *);


 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info < 0) {
 	i__1 = -(*info);
 	xerbla_("SGEBD2", &i__1, (ftnlen)6);
 	return 0;
    }

    if (*m >= *n) {

 /*        Reduce to upper bidiagonal form */

 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {

 /*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */

 	    i__2 = *m - i__ + 1;
 /* Computing MIN */
 	    i__3 = i__ + 1;
 	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * 
 		    a_dim1], &c__1, &tauq[i__]);
 	    d__[i__] = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.f;

 /*           Apply H(i) to A(i:m,i+1:n) from the left */

 	    if (i__ < *n) {
 		i__2 = *m - i__ + 1;
 		i__3 = *n - i__;
 		slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &
 			tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]
 			);
 	    }
 	    a[i__ + i__ * a_dim1] = d__[i__];

 	    if (i__ < *n) {

 /*              Generate elementary reflector G(i) to annihilate */
 /*              A(i,i+2:n) */

 		i__2 = *n - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
 		slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + f2cmin(
 			i__3,*n) * a_dim1], lda, &taup[i__]);
 		e[i__] = a[i__ + (i__ + 1) * a_dim1];
 		a[i__ + (i__ + 1) * a_dim1] = 1.f;

 /*              Apply G(i) to A(i+1:m,i+1:n) from the right */

 		i__2 = *m - i__;
 		i__3 = *n - i__;
 		slarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], 
 			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], 
 			lda, &work[1]);
 		a[i__ + (i__ + 1) * a_dim1] = e[i__];
 	    } else {
 		taup[i__] = 0.f;
 	    }
 /* L10: */
 	}
    } else {

 /*        Reduce to lower bidiagonal form */

 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {

 /*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */

 	    i__2 = *n - i__ + 1;
 /* Computing MIN */
 	    i__3 = i__ + 1;
 	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * 
 		    a_dim1], lda, &taup[i__]);
 	    d__[i__] = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.f;

 /*           Apply G(i) to A(i+1:m,i:n) from the right */

 	    if (i__ < *m) {
 		i__2 = *m - i__;
 		i__3 = *n - i__ + 1;
 		slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &
 			taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
 	    }
 	    a[i__ + i__ * a_dim1] = d__[i__];

 	    if (i__ < *m) {

 /*              Generate elementary reflector H(i) to annihilate */
 /*              A(i+2:m,i) */

 		i__2 = *m - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
 		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*m) + 
 			i__ * a_dim1], &c__1, &tauq[i__]);
 		e[i__] = a[i__ + 1 + i__ * a_dim1];
 		a[i__ + 1 + i__ * a_dim1] = 1.f;

 /*              Apply H(i) to A(i+1:m,i+1:n) from the left */

 		i__2 = *m - i__;
 		i__3 = *n - i__;
 		slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
 			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], 
 			lda, &work[1]);
 		a[i__ + 1 + i__ * a_dim1] = e[i__];
 	    } else {
 		tauq[i__] = 0.f;
 	    }
 /* L20: */
 	}
    }
    return 0;

 /*     End of SGEBD2 */

 } /* sgebd2_ */

--- a/lapack-netlib/SRC/sgebrd.c
+++ b/lapack-netlib/SRC/sgebrd.c
@@ -0,0 +1,783 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 static real c_b21 = -1.f;
 static real c_b22 = 1.f;

 /* > \brief \b SGEBRD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEBRD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgebrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgebrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgebrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, */
 /*                          INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       REAL               A( LDA, * ), D( * ), E( * ), TAUP( * ), */
 /*      $                   TAUQ( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEBRD reduces a general real M-by-N matrix A to upper or lower */
 /* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */
 /* > */
 /* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows in the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns in the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N general matrix to be reduced. */
 /* >          On exit, */
 /* >          if m >= n, the diagonal and the first superdiagonal are */
 /* >            overwritten with the upper bidiagonal matrix B; the */
 /* >            elements below the diagonal, with the array TAUQ, represent */
 /* >            the orthogonal matrix Q as a product of elementary */
 /* >            reflectors, and the elements above the first superdiagonal, */
 /* >            with the array TAUP, represent the orthogonal matrix P as */
 /* >            a product of elementary reflectors; */
 /* >          if m < n, the diagonal and the first subdiagonal are */
 /* >            overwritten with the lower bidiagonal matrix B; the */
 /* >            elements below the first subdiagonal, with the array TAUQ, */
 /* >            represent the orthogonal matrix Q as a product of */
 /* >            elementary reflectors, and the elements above the diagonal, */
 /* >            with the array TAUP, represent the orthogonal matrix P as */
 /* >            a product of elementary reflectors. */
 /* >          See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (f2cmin(M,N)) */
 /* >          The diagonal elements of the bidiagonal matrix B: */
 /* >          D(i) = A(i,i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] E */
 /* > \verbatim */
 /* >          E is REAL array, dimension (f2cmin(M,N)-1) */
 /* >          The off-diagonal elements of the bidiagonal matrix B: */
 /* >          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */
 /* >          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUQ */
 /* > \verbatim */
 /* >          TAUQ is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix Q. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUP */
 /* > \verbatim */
 /* >          TAUP is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix P. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,M,N). */
 /* >          For optimum performance LWORK >= (M+N)*NB, where NB */
 /* >          is the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrices Q and P are represented as products of elementary */
 /* >  reflectors: */
 /* > */
 /* >  If m >= n, */
 /* > */
 /* >     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1) */
 /* > */
 /* >  Each H(i) and G(i) has the form: */
 /* > */
 /* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
 /* > */
 /* >  where tauq and taup are real scalars, and v and u are real vectors; */
 /* >  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */
 /* >  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */
 /* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
 /* > */
 /* >  If m < n, */
 /* > */
 /* >     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m) */
 /* > */
 /* >  Each H(i) and G(i) has the form: */
 /* > */
 /* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
 /* > */
 /* >  where tauq and taup are real scalars, and v and u are real vectors; */
 /* >  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */
 /* >  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */
 /* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
 /* > */
 /* >  The contents of A on exit are illustrated by the following examples: */
 /* > */
 /* >  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n): */
 /* > */
 /* >    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 ) */
 /* >    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 ) */
 /* >    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 ) */
 /* >    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 ) */
 /* >    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 ) */
 /* >    (  v1  v2  v3  v4  v5 ) */
 /* > */
 /* >  where d and e denote diagonal and off-diagonal elements of B, vi */
 /* >  denotes an element of the vector defining H(i), and ui an element of */
 /* >  the vector defining G(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgebrd_(integer *m, integer *n, real *a, integer *lda, 
 	real *d__, real *e, real *tauq, real *taup, real *work, integer *
 	lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, j, nbmin, iinfo;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
 	    integer *, real *, real *, integer *, real *, integer *, real *, 
 	    real *, integer *);
    integer minmn;
    extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer 
 	    *, real *, real *, real *, real *, real *, integer *);
    integer nb, nx;
    extern /* Subroutine */ int slabrd_(integer *, integer *, integer *, real 
 	    *, integer *, real *, real *, real *, real *, real *, integer *, 
 	    real *, integer *);
    integer ws;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwrkx, ldwrky, lwkopt;
    logical lquery;


 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    --work;

    /* Function Body */
    *info = 0;
 /* Computing MAX */
    i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
 	    ftnlen)6, (ftnlen)1);
    nb = f2cmax(i__1,i__2);
    lwkopt = (*m + *n) * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m);
 	if (*lwork < f2cmax(i__1,*n) && ! lquery) {
 	    *info = -10;
 	}
    }
    if (*info < 0) {
 	i__1 = -(*info);
 	xerbla_("SGEBRD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    minmn = f2cmin(*m,*n);
    if (minmn == 0) {
 	work[1] = 1.f;
 	return 0;
    }

    ws = f2cmax(*m,*n);
    ldwrkx = *m;
    ldwrky = *n;

    if (nb > 1 && nb < minmn) {

 /*        Set the crossover point NX. */

 /* Computing MAX */
 	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);

 /*        Determine when to switch from blocked to unblocked code. */

 	if (nx < minmn) {
 	    ws = (*m + *n) * nb;
 	    if (*lwork < ws) {

 /*              Not enough work space for the optimal NB, consider using */
 /*              a smaller block size. */

 		nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
 			ftnlen)6, (ftnlen)1);
 		if (*lwork >= (*m + *n) * nbmin) {
 		    nb = *lwork / (*m + *n);
 		} else {
 		    nb = 1;
 		    nx = minmn;
 		}
 	    }
 	}
    } else {
 	nx = minmn;
    }

    i__1 = minmn - nx;
    i__2 = nb;
    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {

 /*        Reduce rows and columns i:i+nb-1 to bidiagonal form and return */
 /*        the matrices X and Y which are needed to update the unreduced */
 /*        part of the matrix */

 	i__3 = *m - i__ + 1;
 	i__4 = *n - i__ + 1;
 	slabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
 		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx 
 		* nb + 1], &ldwrky);

 /*        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update */
 /*        of the form  A := A - V*Y**T - X*U**T */

 	i__3 = *m - i__ - nb + 1;
 	i__4 = *n - i__ - nb + 1;
 	sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ 
 		+ nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
 		ldwrky, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
 	i__3 = *m - i__ - nb + 1;
 	i__4 = *n - i__ - nb + 1;
 	sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, &
 		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
 		c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda);

 /*        Copy diagonal and off-diagonal elements of B back into A */

 	if (*m >= *n) {
 	    i__3 = i__ + nb - 1;
 	    for (j = i__; j <= i__3; ++j) {
 		a[j + j * a_dim1] = d__[j];
 		a[j + (j + 1) * a_dim1] = e[j];
 /* L10: */
 	    }
 	} else {
 	    i__3 = i__ + nb - 1;
 	    for (j = i__; j <= i__3; ++j) {
 		a[j + j * a_dim1] = d__[j];
 		a[j + 1 + j * a_dim1] = e[j];
 /* L20: */
 	    }
 	}
 /* L30: */
    }

 /*     Use unblocked code to reduce the remainder of the matrix */

    i__2 = *m - i__ + 1;
    i__1 = *n - i__ + 1;
    sgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
 	    tauq[i__], &taup[i__], &work[1], &iinfo);
    work[1] = (real) ws;
    return 0;

 /*     End of SGEBRD */

 } /* sgebrd_ */

--- a/lapack-netlib/SRC/sgecon.c
+++ b/lapack-netlib/SRC/sgecon.c
@@ -0,0 +1,656 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SGECON */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGECON + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgecon.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgecon.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgecon.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, */
 /*                          INFO ) */

 /*       CHARACTER          NORM */
 /*       INTEGER            INFO, LDA, N */
 /*       REAL               ANORM, RCOND */
 /*       INTEGER            IWORK( * ) */
 /*       REAL               A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGECON estimates the reciprocal of the condition number of a general */
 /* > real matrix A, in either the 1-norm or the infinity-norm, using */
 /* > the LU factorization computed by SGETRF. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as */
 /* >    RCOND = 1 / ( norm(A) * norm(inv(A)) ). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] NORM */
 /* > \verbatim */
 /* >          NORM is CHARACTER*1 */
 /* >          Specifies whether the 1-norm condition number or the */
 /* >          infinity-norm condition number is required: */
 /* >          = '1' or 'O':  1-norm; */
 /* >          = 'I':         Infinity-norm. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          The factors L and U from the factorization A = P*L*U */
 /* >          as computed by SGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is REAL */
 /* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
 /* >          If NORM = 'I', the infinity-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(norm(A) * norm(inv(A))). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (4*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgecon_(char *norm, integer *n, real *a, integer *lda, 
 	real *anorm, real *rcond, real *work, integer *iwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1;
    real r__1;

    /* Local variables */
    integer kase, kase1;
    real scale;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int srscl_(integer *, real *, real *, integer *), 
 	    slacn2_(integer *, real *, real *, integer *, real *, integer *, 
 	    integer *);
    real sl;
    integer ix;
    extern real slamch_(char *);
    real su;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer isamax_(integer *, real *, integer *);
    real ainvnm;
    logical onenrm;
    char normin[1];
    extern /* Subroutine */ int slatrs_(char *, char *, char *, char *, 
 	    integer *, real *, integer *, real *, real *, real *, integer *);
    real smlnum;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
    if (! onenrm && ! lsame_(norm, "I")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*anorm < 0.f) {
 	*info = -5;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGECON", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.f;
    if (*n == 0) {
 	*rcond = 1.f;
 	return 0;
    } else if (*anorm == 0.f) {
 	return 0;
    }

    smlnum = slamch_("Safe minimum");

 /*     Estimate the norm of inv(A). */

    ainvnm = 0.f;
    *(unsigned char *)normin = 'N';
    if (onenrm) {
 	kase1 = 1;
    } else {
 	kase1 = 2;
    }
    kase = 0;
 L10:
    slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
    if (kase != 0) {
 	if (kase == kase1) {

 /*           Multiply by inv(L). */

 	    slatrs_("Lower", "No transpose", "Unit", normin, n, &a[a_offset], 
 		    lda, &work[1], &sl, &work[(*n << 1) + 1], info);

 /*           Multiply by inv(U). */

 	    slatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[
 		    a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info);
 	} else {

 /*           Multiply by inv(U**T). */

 	    slatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset],
 		     lda, &work[1], &su, &work[*n * 3 + 1], info);

 /*           Multiply by inv(L**T). */

 	    slatrs_("Lower", "Transpose", "Unit", normin, n, &a[a_offset], 
 		    lda, &work[1], &sl, &work[(*n << 1) + 1], info);
 	}

 /*        Divide X by 1/(SL*SU) if doing so will not cause overflow. */

 	scale = sl * su;
 	*(unsigned char *)normin = 'Y';
 	if (scale != 1.f) {
 	    ix = isamax_(n, &work[1], &c__1);
 	    if (scale < (r__1 = work[ix], abs(r__1)) * smlnum || scale == 0.f)
 		     {
 		goto L20;
 	    }
 	    srscl_(n, &scale, &work[1], &c__1);
 	}
 	goto L10;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.f) {
 	*rcond = 1.f / ainvnm / *anorm;
    }

 L20:
    return 0;

 /*     End of SGECON */

 } /* sgecon_ */

--- a/lapack-netlib/SRC/sgeequ.c
+++ b/lapack-netlib/SRC/sgeequ.c
@@ -0,0 +1,733 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGEEQU */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEEQU + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeequ.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeequ.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeequ.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */
 /*                          INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       REAL               AMAX, COLCND, ROWCND */
 /*       REAL               A( LDA, * ), C( * ), R( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEEQU computes row and column scalings intended to equilibrate an */
 /* > M-by-N matrix A and reduce its condition number.  R returns the row */
 /* > scale factors and C the column scale factors, chosen to try to make */
 /* > the largest element in each row and column of the matrix B with */
 /* > elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */
 /* > */
 /* > R(i) and C(j) are restricted to be between SMLNUM = smallest safe */
 /* > number and BIGNUM = largest safe number.  Use of these scaling */
 /* > factors is not guaranteed to reduce the condition number of A but */
 /* > works well in practice. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          The M-by-N matrix whose equilibration factors are */
 /* >          to be computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] R */
 /* > \verbatim */
 /* >          R is REAL array, dimension (M) */
 /* >          If INFO = 0 or INFO > M, R contains the row scale factors */
 /* >          for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] C */
 /* > \verbatim */
 /* >          C is REAL array, dimension (N) */
 /* >          If INFO = 0,  C contains the column scale factors for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ROWCND */
 /* > \verbatim */
 /* >          ROWCND is REAL */
 /* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
 /* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
 /* >          AMAX is neither too large nor too small, it is not worth */
 /* >          scaling by R. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] COLCND */
 /* > \verbatim */
 /* >          COLCND is REAL */
 /* >          If INFO = 0, COLCND contains the ratio of the smallest */
 /* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
 /* >          worth scaling by C. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AMAX */
 /* > \verbatim */
 /* >          AMAX is REAL */
 /* >          Absolute value of largest matrix element.  If AMAX is very */
 /* >          close to overflow or very close to underflow, the matrix */
 /* >          should be scaled. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i,  and i is */
 /* >                <= M:  the i-th row of A is exactly zero */
 /* >                >  M:  the (i-M)-th column of A is exactly zero */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgeequ_(integer *m, integer *n, real *a, integer *lda, 
 	real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer 
 	*info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1, r__2, r__3;

    /* Local variables */
    integer i__, j;
    real rcmin, rcmax;
    extern real slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum, smlnum;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --r__;
    --c__;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEEQU", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*m == 0 || *n == 0) {
 	*rowcnd = 1.f;
 	*colcnd = 1.f;
 	*amax = 0.f;
 	return 0;
    }

 /*     Get machine constants. */

    smlnum = slamch_("S");
    bignum = 1.f / smlnum;

 /*     Compute row scale factors. */

    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	r__[i__] = 0.f;
 /* L10: */
    }

 /*     Find the maximum element in each row. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    r__2 = r__[i__], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1));
 	    r__[i__] = f2cmax(r__2,r__3);
 /* L20: */
 	}
 /* L30: */
    }

 /*     Find the maximum and minimum scale factors. */

    rcmin = bignum;
    rcmax = 0.f;
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
 	r__1 = rcmax, r__2 = r__[i__];
 	rcmax = f2cmax(r__1,r__2);
 /* Computing MIN */
 	r__1 = rcmin, r__2 = r__[i__];
 	rcmin = f2cmin(r__1,r__2);
 /* L40: */
    }
    *amax = rcmax;

    if (rcmin == 0.f) {

 /*        Find the first zero scale factor and return an error code. */

 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (r__[i__] == 0.f) {
 		*info = i__;
 		return 0;
 	    }
 /* L50: */
 	}
    } else {

 /*        Invert the scale factors. */

 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 /* Computing MAX */
 	    r__2 = r__[i__];
 	    r__1 = f2cmax(r__2,smlnum);
 	    r__[i__] = 1.f / f2cmin(r__1,bignum);
 /* L60: */
 	}

 /*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)) */

 	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }

 /*     Compute column scale factors */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	c__[j] = 0.f;
 /* L70: */
    }

 /*     Find the maximum element in each column, */
 /*     assuming the row scaling computed above. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    r__2 = c__[j], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1)) * 
 		    r__[i__];
 	    c__[j] = f2cmax(r__2,r__3);
 /* L80: */
 	}
 /* L90: */
    }

 /*     Find the maximum and minimum scale factors. */

    rcmin = bignum;
    rcmax = 0.f;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 	r__1 = rcmin, r__2 = c__[j];
 	rcmin = f2cmin(r__1,r__2);
 /* Computing MAX */
 	r__1 = rcmax, r__2 = c__[j];
 	rcmax = f2cmax(r__1,r__2);
 /* L100: */
    }

    if (rcmin == 0.f) {

 /*        Find the first zero scale factor and return an error code. */

 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    if (c__[j] == 0.f) {
 		*info = *m + j;
 		return 0;
 	    }
 /* L110: */
 	}
    } else {

 /*        Invert the scale factors. */

 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 /* Computing MAX */
 	    r__2 = c__[j];
 	    r__1 = f2cmax(r__2,smlnum);
 	    c__[j] = 1.f / f2cmin(r__1,bignum);
 /* L120: */
 	}

 /*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)) */

 	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }

    return 0;

 /*     End of SGEEQU */

 } /* sgeequ_ */

--- a/lapack-netlib/SRC/sgeequb.c
+++ b/lapack-netlib/SRC/sgeequb.c
@@ -0,0 +1,753 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGEEQUB */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEEQUB + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeequb
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeequb
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeequb
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */
 /*                           INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       REAL               AMAX, COLCND, ROWCND */
 /*       REAL               A( LDA, * ), C( * ), R( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEEQUB computes row and column scalings intended to equilibrate an */
 /* > M-by-N matrix A and reduce its condition number.  R returns the row */
 /* > scale factors and C the column scale factors, chosen to try to make */
 /* > the largest element in each row and column of the matrix B with */
 /* > elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */
 /* > the radix. */
 /* > */
 /* > R(i) and C(j) are restricted to be a power of the radix between */
 /* > SMLNUM = smallest safe number and BIGNUM = largest safe number.  Use */
 /* > of these scaling factors is not guaranteed to reduce the condition */
 /* > number of A but works well in practice. */
 /* > */
 /* > This routine differs from SGEEQU by restricting the scaling factors */
 /* > to a power of the radix.  Barring over- and underflow, scaling by */
 /* > these factors introduces no additional rounding errors.  However, the */
 /* > scaled entries' magnitudes are no longer approximately 1 but lie */
 /* > between sqrt(radix) and 1/sqrt(radix). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          The M-by-N matrix whose equilibration factors are */
 /* >          to be computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] R */
 /* > \verbatim */
 /* >          R is REAL array, dimension (M) */
 /* >          If INFO = 0 or INFO > M, R contains the row scale factors */
 /* >          for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] C */
 /* > \verbatim */
 /* >          C is REAL array, dimension (N) */
 /* >          If INFO = 0,  C contains the column scale factors for A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ROWCND */
 /* > \verbatim */
 /* >          ROWCND is REAL */
 /* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
 /* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
 /* >          AMAX is neither too large nor too small, it is not worth */
 /* >          scaling by R. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] COLCND */
 /* > \verbatim */
 /* >          COLCND is REAL */
 /* >          If INFO = 0, COLCND contains the ratio of the smallest */
 /* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
 /* >          worth scaling by C. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] AMAX */
 /* > \verbatim */
 /* >          AMAX is REAL */
 /* >          Absolute value of largest matrix element.  If AMAX is very */
 /* >          close to overflow or very close to underflow, the matrix */
 /* >          should be scaled. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i,  and i is */
 /* >                <= M:  the i-th row of A is exactly zero */
 /* >                >  M:  the (i-M)-th column of A is exactly zero */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgeequb_(integer *m, integer *n, real *a, integer *lda, 
 	real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer 
 	*info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1, r__2, r__3;

    /* Local variables */
    integer i__, j;
    real radix, rcmin, rcmax;
    extern real slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real bignum, logrdx, smlnum;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --r__;
    --c__;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEEQUB", &i__1, (ftnlen)7);
 	return 0;
    }

 /*     Quick return if possible. */

    if (*m == 0 || *n == 0) {
 	*rowcnd = 1.f;
 	*colcnd = 1.f;
 	*amax = 0.f;
 	return 0;
    }

 /*     Get machine constants.  Assume SMLNUM is a power of the radix. */

    smlnum = slamch_("S");
    bignum = 1.f / smlnum;
    radix = slamch_("B");
    logrdx = log(radix);

 /*     Compute row scale factors. */

    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	r__[i__] = 0.f;
 /* L10: */
    }

 /*     Find the maximum element in each row. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    r__2 = r__[i__], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1));
 	    r__[i__] = f2cmax(r__2,r__3);
 /* L20: */
 	}
 /* L30: */
    }
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	if (r__[i__] > 0.f) {
 	    i__2 = (integer) (log(r__[i__]) / logrdx);
 	    r__[i__] = pow_ri(&radix, &i__2);
 	}
    }

 /*     Find the maximum and minimum scale factors. */

    rcmin = bignum;
    rcmax = 0.f;
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
 	r__1 = rcmax, r__2 = r__[i__];
 	rcmax = f2cmax(r__1,r__2);
 /* Computing MIN */
 	r__1 = rcmin, r__2 = r__[i__];
 	rcmin = f2cmin(r__1,r__2);
 /* L40: */
    }
    *amax = rcmax;

    if (rcmin == 0.f) {

 /*        Find the first zero scale factor and return an error code. */

 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (r__[i__] == 0.f) {
 		*info = i__;
 		return 0;
 	    }
 /* L50: */
 	}
    } else {

 /*        Invert the scale factors. */

 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MIN */
 /* Computing MAX */
 	    r__2 = r__[i__];
 	    r__1 = f2cmax(r__2,smlnum);
 	    r__[i__] = 1.f / f2cmin(r__1,bignum);
 /* L60: */
 	}

 /*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)). */

 	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }

 /*     Compute column scale factors */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	c__[j] = 0.f;
 /* L70: */
    }

 /*     Find the maximum element in each column, */
 /*     assuming the row scaling computed above. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    r__2 = c__[j], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1)) * 
 		    r__[i__];
 	    c__[j] = f2cmax(r__2,r__3);
 /* L80: */
 	}
 	if (c__[j] > 0.f) {
 	    i__2 = (integer) (log(c__[j]) / logrdx);
 	    c__[j] = pow_ri(&radix, &i__2);
 	}
 /* L90: */
    }

 /*     Find the maximum and minimum scale factors. */

    rcmin = bignum;
    rcmax = 0.f;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 	r__1 = rcmin, r__2 = c__[j];
 	rcmin = f2cmin(r__1,r__2);
 /* Computing MAX */
 	r__1 = rcmax, r__2 = c__[j];
 	rcmax = f2cmax(r__1,r__2);
 /* L100: */
    }

    if (rcmin == 0.f) {

 /*        Find the first zero scale factor and return an error code. */

 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    if (c__[j] == 0.f) {
 		*info = *m + j;
 		return 0;
 	    }
 /* L110: */
 	}
    } else {

 /*        Invert the scale factors. */

 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 /* Computing MAX */
 	    r__2 = c__[j];
 	    r__1 = f2cmax(r__2,smlnum);
 	    c__[j] = 1.f / f2cmin(r__1,bignum);
 /* L120: */
 	}

 /*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)). */

 	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
    }

    return 0;

 /*     End of SGEEQUB */

 } /* sgeequb_ */

--- a/lapack-netlib/SRC/sgees.c
+++ b/lapack-netlib/SRC/sgees.c
--- a/lapack-netlib/SRC/sgeesx.c
+++ b/lapack-netlib/SRC/sgeesx.c
--- a/lapack-netlib/SRC/sgeev.c
+++ b/lapack-netlib/SRC/sgeev.c
--- a/lapack-netlib/SRC/sgeevx.c
+++ b/lapack-netlib/SRC/sgeevx.c
--- a/lapack-netlib/SRC/sgehd2.c
+++ b/lapack-netlib/SRC/sgehd2.c
@@ -0,0 +1,627 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. 
 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEHD2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgehd2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgehd2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgehd2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) */

 /*       INTEGER            IHI, ILO, INFO, LDA, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEHD2 reduces a real general matrix A to upper Hessenberg form H by */
 /* > an orthogonal similarity transformation:  Q**T * A * Q = H . */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* > */
 /* >          It is assumed that A is already upper triangular in rows */
 /* >          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
 /* >          set by a previous call to SGEBAL; otherwise they should be */
 /* >          set to 1 and N respectively. See Further Details. */
 /* >          1 <= ILO <= IHI <= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the n by n general matrix to be reduced. */
 /* >          On exit, the upper triangle and the first subdiagonal of A */
 /* >          are overwritten with the upper Hessenberg matrix H, and the */
 /* >          elements below the first subdiagonal, with the array TAU, */
 /* >          represent the orthogonal matrix Q as a product of elementary */
 /* >          reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (N-1) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of (ihi-ilo) elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
 /* >  exit in A(i+2:ihi,i), and tau in TAU(i). */
 /* > */
 /* >  The contents of A are illustrated by the following example, with */
 /* >  n = 7, ilo = 2 and ihi = 6: */
 /* > */
 /* >  on entry,                        on exit, */
 /* > */
 /* >  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
 /* >  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
 /* >  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
 /* >  (                         a )    (                          a ) */
 /* > */
 /* >  where a denotes an element of the original matrix A, h denotes a */
 /* >  modified element of the upper Hessenberg matrix H, and vi denotes an */
 /* >  element of the vector defining H(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, 
 	integer *lda, real *tau, real *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;

    /* Local variables */
    integer i__;
    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
 	    integer *, real *, real *, integer *, real *), xerbla_(
 	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
 	    integer *, real *);
    real aii;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
 	*info = -2;
    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEHD2", &i__1, (ftnlen)6);
 	return 0;
    }

    i__1 = *ihi - 1;
    for (i__ = *ilo; i__ <= i__1; ++i__) {

 /*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */

 	i__2 = *ihi - i__;
 /* Computing MIN */
 	i__3 = i__ + 2;
 	slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*n) + i__ * 
 		a_dim1], &c__1, &tau[i__]);
 	aii = a[i__ + 1 + i__ * a_dim1];
 	a[i__ + 1 + i__ * a_dim1] = 1.f;

 /*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */

 	i__2 = *ihi - i__;
 	slarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);

 /*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */

 	i__2 = *ihi - i__;
 	i__3 = *n - i__;
 	slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);

 	a[i__ + 1 + i__ * a_dim1] = aii;
 /* L10: */
    }

    return 0;

 /*     End of SGEHD2 */

 } /* sgehd2_ */

--- a/lapack-netlib/SRC/sgehrd.c
+++ b/lapack-netlib/SRC/sgehrd.c
@@ -0,0 +1,782 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 static integer c__65 = 65;
 static real c_b25 = -1.f;
 static real c_b26 = 1.f;

 /* > \brief \b SGEHRD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEHRD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgehrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgehrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgehrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) */

 /*       INTEGER            IHI, ILO, INFO, LDA, LWORK, N */
 /*       REAL              A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEHRD reduces a real general matrix A to upper Hessenberg form H by */
 /* > an orthogonal similarity transformation:  Q**T * A * Q = H . */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* > */
 /* >          It is assumed that A is already upper triangular in rows */
 /* >          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
 /* >          set by a previous call to SGEBAL; otherwise they should be */
 /* >          set to 1 and N respectively. See Further Details. */
 /* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the N-by-N general matrix to be reduced. */
 /* >          On exit, the upper triangle and the first subdiagonal of A */
 /* >          are overwritten with the upper Hessenberg matrix H, and the */
 /* >          elements below the first subdiagonal, with the array TAU, */
 /* >          represent the orthogonal matrix Q as a product of elementary */
 /* >          reflectors. See Further Details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (N-1) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */
 /* >          zero. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (LWORK) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The length of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For good performance, LWORK should generally be larger. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of (ihi-ilo) elementary */
 /* >  reflectors */
 /* > */
 /* >     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
 /* >  exit in A(i+2:ihi,i), and tau in TAU(i). */
 /* > */
 /* >  The contents of A are illustrated by the following example, with */
 /* >  n = 7, ilo = 2 and ihi = 6: */
 /* > */
 /* >  on entry,                        on exit, */
 /* > */
 /* >  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
 /* >  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
 /* >  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
 /* >  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
 /* >  (                         a )    (                          a ) */
 /* > */
 /* >  where a denotes an element of the original matrix A, h denotes a */
 /* >  modified element of the upper Hessenberg matrix H, and vi denotes an */
 /* >  element of the vector defining H(i). */
 /* > */
 /* >  This file is a slight modification of LAPACK-3.0's DGEHRD */
 /* >  subroutine incorporating improvements proposed by Quintana-Orti and */
 /* >  Van de Geijn (2006). (See DLAHR2.) */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, 
 	integer *lda, real *tau, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, j, nbmin, iinfo;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
 	    integer *, real *, real *, integer *, real *, integer *, real *, 
 	    real *, integer *), strmm_(char *, char *, char *,
 	     char *, integer *, integer *, real *, real *, integer *, real *, 
 	    integer *), saxpy_(integer *, 
 	    real *, real *, integer *, real *, integer *), sgehd2_(integer *, 
 	    integer *, integer *, real *, integer *, real *, real *, integer *
 	    ), slahr2_(integer *, integer *, integer *, real *, integer *, 
 	    real *, real *, integer *, real *, integer *);
    integer ib;
    real ei;
    integer nb, nh, nx;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *,ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    integer iwt;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
 	*info = -2;
    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -8;
    }

    if (*info == 0) {

 /*       Compute the workspace requirements */

 /* Computing MIN */
 	i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nb = f2cmin(i__1,i__2);
 	lwkopt = *n * nb + 4160;
 	work[1] = (real) lwkopt;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEHRD", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */

    i__1 = *ilo - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	tau[i__] = 0.f;
 /* L10: */
    }
    i__1 = *n - 1;
    for (i__ = f2cmax(1,*ihi); i__ <= i__1; ++i__) {
 	tau[i__] = 0.f;
 /* L20: */
    }

 /*     Quick return if possible */

    nh = *ihi - *ilo + 1;
    if (nh <= 1) {
 	work[1] = 1.f;
 	return 0;
    }

 /*     Determine the block size */

 /* Computing MIN */
    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
 	    ftnlen)6, (ftnlen)1);
    nb = f2cmin(i__1,i__2);
    nbmin = 2;
    if (nb > 1 && nb < nh) {

 /*        Determine when to cross over from blocked to unblocked code */
 /*        (last block is always handled by unblocked code) */

 /* Computing MAX */
 	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < nh) {

 /*           Determine if workspace is large enough for blocked code */

 	    if (*lwork < *n * nb + 4160) {

 /*              Not enough workspace to use optimal NB:  determine the */
 /*              minimum value of NB, and reduce NB or force use of */
 /*              unblocked code */

 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 		if (*lwork >= *n * nbmin + 4160) {
 		    nb = (*lwork - 4160) / *n;
 		} else {
 		    nb = 1;
 		}
 	    }
 	}
    }
    ldwork = *n;

    if (nb < nbmin || nb >= nh) {

 /*        Use unblocked code below */

 	i__ = *ilo;

    } else {

 /*        Use blocked code */

 	iwt = *n * nb + 1;
 	i__1 = *ihi - 1 - nx;
 	i__2 = nb;
 	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = nb, i__4 = *ihi - i__;
 	    ib = f2cmin(i__3,i__4);

 /*           Reduce columns i:i+ib-1 to Hessenberg form, returning the */
 /*           matrices V and T of the block reflector H = I - V*T*V**T */
 /*           which performs the reduction, and also the matrix Y = A*V*T */

 	    slahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], &
 		    work[iwt], &c__65, &work[1], &ldwork);

 /*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */
 /*           right, computing  A := A - Y * V**T. V(i+ib,ib-1) must be set */
 /*           to 1 */

 	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
 	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f;
 	    i__3 = *ihi - i__ - ib + 1;
 	    sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, &
 		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
 		    c_b26, &a[(i__ + ib) * a_dim1 + 1], lda);
 	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;

 /*           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */
 /*           right */

 	    i__3 = ib - 1;
 	    strmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26,
 		     &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork);
 	    i__3 = ib - 2;
 	    for (j = 0; j <= i__3; ++j) {
 		saxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + 
 			j + 1) * a_dim1 + 1], &c__1);
 /* L30: */
 	    }

 /*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */
 /*           left */

 	    i__3 = *ihi - i__;
 	    i__4 = *n - i__ - ib + 1;
 	    slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, &work[iwt], &
 		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
 		    ldwork);
 /* L40: */
 	}
    }

 /*     Use unblocked code to reduce the rest of the matrix */

    sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    work[1] = (real) lwkopt;

    return 0;

 /*     End of SGEHRD */

 } /* sgehrd_ */

--- a/lapack-netlib/SRC/sgejsv.c
+++ b/lapack-netlib/SRC/sgejsv.c
--- a/lapack-netlib/SRC/sgelq.c
+++ b/lapack-netlib/SRC/sgelq.c
@@ -0,0 +1,742 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;

 /* > \brief \b SGELQ */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, */
 /*                         INFO ) */

 /*       INTEGER           INFO, LDA, M, N, TSIZE, LWORK */
 /*       REAL              A( LDA, * ), T( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGELQ computes an LQ factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = ( L 0 ) *  Q */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a N-by-N orthogonal matrix; */
 /* >    L is a lower-triangular M-by-M matrix; */
 /* >    0 is a M-by-(N-M) zero matrix, if M < N. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and below the diagonal of the array */
 /* >          contain the M-by-f2cmin(M,N) lower trapezoidal matrix L */
 /* >          (L is lower triangular if M <= N); */
 /* >          the elements above the diagonal are used to store part of the */
 /* >          data structure to represent Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (MAX(5,TSIZE)) */
 /* >          On exit, if INFO = 0, T(1) returns optimal (or either minimal */
 /* >          or optimal, if query is assumed) TSIZE. See TSIZE for details. */
 /* >          Remaining T contains part of the data structure used to represent Q. */
 /* >          If one wants to apply or construct Q, then one needs to keep T */
 /* >          (in addition to A) and pass it to further subroutines. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TSIZE */
 /* > \verbatim */
 /* >          TSIZE is INTEGER */
 /* >          If TSIZE >= 5, the dimension of the array T. */
 /* >          If TSIZE = -1 or -2, then a workspace query is assumed. The routine */
 /* >          only calculates the sizes of the T and WORK arrays, returns these */
 /* >          values as the first entries of the T and WORK arrays, and no error */
 /* >          message related to T or WORK is issued by XERBLA. */
 /* >          If TSIZE = -1, the routine calculates optimal size of T for the */
 /* >          optimum performance and returns this value in T(1). */
 /* >          If TSIZE = -2, the routine calculates minimal size of T and */
 /* >          returns this value in T(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          (workspace) REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
 /* >          or optimal, if query was assumed) LWORK. */
 /* >          See LWORK for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1 or -2, then a workspace query is assumed. The routine */
 /* >          only calculates the sizes of the T and WORK arrays, returns these */
 /* >          values as the first entries of the T and WORK arrays, and no error */
 /* >          message related to T or WORK is issued by XERBLA. */
 /* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
 /* >          optimal performance and returns this value in WORK(1). */
 /* >          If LWORK = -2, the routine calculates minimal size of WORK and */
 /* >          returns this value in WORK(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \par Further Details */
 /*  ==================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > The goal of the interface is to give maximum freedom to the developers for */
 /* > creating any LQ factorization algorithm they wish. The triangular */
 /* > (trapezoidal) L has to be stored in the lower part of A. The lower part of A */
 /* > and the array T can be used to store any relevant information for applying or */
 /* > constructing the Q factor. The WORK array can safely be discarded after exit. */
 /* > */
 /* > Caution: One should not expect the sizes of T and WORK to be the same from one */
 /* > LAPACK implementation to the other, or even from one execution to the other. */
 /* > A workspace query (for T and WORK) is needed at each execution. However, */
 /* > for a given execution, the size of T and WORK are fixed and will not change */
 /* > from one query to the next. */
 /* > */
 /* > \endverbatim */
 /* > */
 /* > \par Further Details particular to this LAPACK implementation: */
 /*  ============================================================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > These details are particular for this LAPACK implementation. Users should not */
 /* > take them for granted. These details may change in the future, and are not likely */
 /* > true for another LAPACK implementation. These details are relevant if one wants */
 /* > to try to understand the code. They are not part of the interface. */
 /* > */
 /* > In this version, */
 /* > */
 /* >          T(2): row block size (MB) */
 /* >          T(3): column block size (NB) */
 /* >          T(6:TSIZE): data structure needed for Q, computed by */
 /* >                           SLASWLQ or SGELQT */
 /* > */
 /* >  Depending on the matrix dimensions M and N, and row and column */
 /* >  block sizes MB and NB returned by ILAENV, SGELQ will use either */
 /* >  SLASWLQ (if the matrix is short-and-wide) or SGELQT to compute */
 /* >  the LQ factorization. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgelq_(integer *m, integer *n, real *a, integer *lda, 
 	real *t, integer *tsize, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    logical mint, minw;
    integer lwmin, lwreq, lwopt, mb, nb, nblcks;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int sgelqt_(integer *, integer *, integer *, real 
 	    *, integer *, real *, integer *, real *, integer *);
    logical lminws, lquery;
    integer mintsz;
    extern /* Subroutine */ int slaswlq_(integer *, integer *, integer *, 
 	    integer *, real *, integer *, real *, integer *, real *, integer *
 	    , integer *);


 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */
 /*     November 2019 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --t;
    --work;

    /* Function Body */
    *info = 0;

    lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2;

    mint = FALSE_;
    minw = FALSE_;
    if (*tsize == -2 || *lwork == -2) {
 	if (*tsize != -1) {
 	    mint = TRUE_;
 	}
 	if (*lwork != -1) {
 	    minw = TRUE_;
 	}
    }

 /*     Determine the block size */

    if (f2cmin(*m,*n) > 0) {
 	mb = ilaenv_(&c__1, "SGELQ ", " ", m, n, &c__1, &c_n1, (ftnlen)6, (
 		ftnlen)1);
 	nb = ilaenv_(&c__1, "SGELQ ", " ", m, n, &c__2, &c_n1, (ftnlen)6, (
 		ftnlen)1);
    } else {
 	mb = 1;
 	nb = *n;
    }
    if (mb > f2cmin(*m,*n) || mb < 1) {
 	mb = 1;
    }
    if (nb > *n || nb <= *m) {
 	nb = *n;
    }
    mintsz = *m + 5;
    if (nb > *m && *n > *m) {
 	if ((*n - *m) % (nb - *m) == 0) {
 	    nblcks = (*n - *m) / (nb - *m);
 	} else {
 	    nblcks = (*n - *m) / (nb - *m) + 1;
 	}
    } else {
 	nblcks = 1;
    }

 /*     Determine if the workspace size satisfies minimal size */

    if (*n <= *m || nb <= *m || nb >= *n) {
 	lwmin = f2cmax(1,*n);
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *n;
 	lwopt = f2cmax(i__1,i__2);
    } else {
 	lwmin = f2cmax(1,*m);
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *m;
 	lwopt = f2cmax(i__1,i__2);
    }
    lminws = FALSE_;
 /* Computing MAX */
    i__1 = 1, i__2 = mb * *m * nblcks + 5;
    if ((*tsize < f2cmax(i__1,i__2) || *lwork < lwopt) && *lwork >= lwmin && *
 	    tsize >= mintsz && ! lquery) {
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *m * nblcks + 5;
 	if (*tsize < f2cmax(i__1,i__2)) {
 	    lminws = TRUE_;
 	    mb = 1;
 	    nb = *n;
 	}
 	if (*lwork < lwopt) {
 	    lminws = TRUE_;
 	    mb = 1;
 	}
    }
    if (*n <= *m || nb <= *m || nb >= *n) {
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *n;
 	lwreq = f2cmax(i__1,i__2);
    } else {
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *m;
 	lwreq = f2cmax(i__1,i__2);
    }

    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = mb * *m * nblcks + 5;
 	if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
 	    *info = -6;
 	} else if (*lwork < lwreq && ! lquery && ! lminws) {
 	    *info = -8;
 	}
    }

    if (*info == 0) {
 	if (mint) {
 	    t[1] = (real) mintsz;
 	} else {
 	    t[1] = (real) (mb * *m * nblcks + 5);
 	}
 	t[2] = (real) mb;
 	t[3] = (real) nb;
 	if (minw) {
 	    work[1] = (real) lwmin;
 	} else {
 	    work[1] = (real) lwreq;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGELQ", &i__1, (ftnlen)5);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (f2cmin(*m,*n) == 0) {
 	return 0;
    }

 /*     The LQ Decomposition */

    if (*n <= *m || nb <= *m || nb >= *n) {
 	sgelqt_(m, n, &mb, &a[a_offset], lda, &t[6], &mb, &work[1], info);
    } else {
 	slaswlq_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &mb, &work[1], 
 		lwork, info);
    }

    work[1] = (real) lwreq;
    return 0;

 /*     End of SGELQ */

 } /* sgelq_ */

--- a/lapack-netlib/SRC/sgelq2.c
+++ b/lapack-netlib/SRC/sgelq2.c
@@ -0,0 +1,597 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorit
 hm. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGELQ2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgelq2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgelq2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgelq2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGELQ2 computes an LQ factorization of a real m-by-n matrix A: */
 /* > */
 /* >    A = ( L 0 ) *  Q */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a n-by-n orthogonal matrix; */
 /* >    L is an lower-triangular m-by-m matrix; */
 /* >    0 is a m-by-(n-m) zero matrix, if m < n. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, the elements on and below the diagonal of the array */
 /* >          contain the m by f2cmin(m,n) lower trapezoidal matrix L (L is */
 /* >          lower triangular if m <= n); the elements above the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (M) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2019 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
 /* >  and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgelq2_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;

    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
 	    integer *, real *, real *, integer *, real *), xerbla_(
 	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
 	    integer *, real *);
    real aii;


 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGELQ2", &i__1, (ftnlen)6);
 	return 0;
    }

    k = f2cmin(*m,*n);

    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */

 	i__2 = *n - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
 	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * a_dim1]
 		, lda, &tau[i__]);
 	if (i__ < *m) {

 /*           Apply H(i) to A(i+1:m,i:n) from the right */

 	    aii = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.f;
 	    i__2 = *m - i__;
 	    i__3 = *n - i__ + 1;
 	    slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
 		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
 /* L10: */
    }
    return 0;

 /*     End of SGELQ2 */

 } /* sgelq2_ */

--- a/lapack-netlib/SRC/sgelqf.c
+++ b/lapack-netlib/SRC/sgelqf.c
@@ -0,0 +1,698 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;

 /* > \brief \b SGELQF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGELQF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgelqf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgelqf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgelqf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGELQF computes an LQ factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = ( L 0 ) *  Q */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a N-by-N orthogonal matrix; */
 /* >    L is an lower-triangular M-by-M matrix; */
 /* >    0 is a M-by-(N-M) zero matrix, if M < N. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and below the diagonal of the array */
 /* >          contain the m-by-f2cmin(m,n) lower trapezoidal matrix L (L is */
 /* >          lower triangular if m <= n); the elements above the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,M). */
 /* >          For optimum performance LWORK >= M*NB, where NB is the */
 /* >          optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2019 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
 /* >  and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgelqf_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer 
 	    *, real *, real *, integer *);
    integer ib, nb, nx;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
 	    real *, integer *, real *, real *, integer *);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;


 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
    lwkopt = *m * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*lwork < f2cmax(1,*m) && ! lquery) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGELQF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    k = f2cmin(*m,*n);
    if (k == 0) {
 	work[1] = 1.f;
 	return 0;
    }

    nbmin = 2;
    nx = 0;
    iws = *m;
    if (nb > 1 && nb < k) {

 /*        Determine when to cross over from blocked to unblocked code. */

 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {

 /*           Determine if workspace is large enough for blocked code. */

 	    ldwork = *m;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {

 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */

 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }

    if (nb >= nbmin && nb < k && nx < k) {

 /*        Use blocked code initially */

 	i__1 = k - nx;
 	i__2 = nb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);

 /*           Compute the LQ factorization of the current block */
 /*           A(i:i+ib-1,i:n) */

 	    i__3 = *n - i__ + 1;
 	    sgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
 		    1], &iinfo);
 	    if (i__ + ib <= *m) {

 /*              Form the triangular factor of the block reflector */
 /*              H = H(i) H(i+1) . . . H(i+ib-1) */

 		i__3 = *n - i__ + 1;
 		slarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * 
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);

 /*              Apply H to A(i+ib:m,i:n) from the right */

 		i__3 = *m - i__ - ib + 1;
 		i__4 = *n - i__ + 1;
 		slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, 
 			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + 
 			1], &ldwork);
 	    }
 /* L10: */
 	}
    } else {
 	i__ = 1;
    }

 /*     Use unblocked code to factor the last or only block. */

    if (i__ <= k) {
 	i__2 = *m - i__ + 1;
 	i__1 = *n - i__ + 1;
 	sgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
 		, &iinfo);
    }

    work[1] = (real) iws;
    return 0;

 /*     End of SGELQF */

 } /* sgelqf_ */

--- a/lapack-netlib/SRC/sgelqt.c
+++ b/lapack-netlib/SRC/sgelqt.c
@@ -0,0 +1,601 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGELQT */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) */

 /*       INTEGER   INFO, LDA, LDT, M, N, MB */
 /*       REAL      A( LDA, * ), T( LDT, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGELQT computes a blocked LQ factorization of a real M-by-N matrix A */
 /* > using the compact WY representation of Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] MB */
 /* > \verbatim */
 /* >          MB is INTEGER */
 /* >          The block size to be used in the blocked QR.  MIN(M,N) >= MB >= 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and below the diagonal of the array */
 /* >          contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is */
 /* >          lower triangular if M <= N); the elements above the diagonal */
 /* >          are the rows of V. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (LDT,MIN(M,N)) */
 /* >          The upper triangular block reflectors stored in compact form */
 /* >          as a sequence of upper triangular blocks.  See below */
 /* >          for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= MB. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MB*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup doubleGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th row */
 /* >  above the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1  v1 v1 v1 v1 ) */
 /* >                   (     1  v2 v2 v2 ) */
 /* >                   (         1 v3 v3 ) */
 /* > */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A. */
 /* >  Let K=MIN(M,N).  The number of blocks is B = ceiling(K/MB), where each */
 /* >  block is of order MB except for the last block, which is of order */
 /* >  IB = K - (B-1)*MB.  For each of the B blocks, a upper triangular block */
 /* >  reflector factor is computed: T1, T2, ..., TB.  The MB-by-MB (and IB-by-IB */
 /* >  for the last block) T's are stored in the MB-by-K matrix T as */
 /* > */
 /* >               T = (T1 T2 ... TB). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgelqt_(integer *m, integer *n, integer *mb, real *a, 
 	integer *lda, real *t, integer *ldt, real *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5;

    /* Local variables */
    integer i__, k, iinfo, ib;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen), sgelqt3_(
 	    integer *, integer *, real *, integer *, real *, integer *, 
 	    integer *);


 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /* ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*mb < 1 || *mb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldt < *mb) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGELQT", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    k = f2cmin(*m,*n);
    if (k == 0) {
 	return 0;
    }

 /*     Blocked loop of length K */

    i__1 = k;
    i__2 = *mb;
    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	i__3 = k - i__ + 1;
 	ib = f2cmin(i__3,*mb);

 /*     Compute the LQ factorization of the current block A(I:M,I:I+IB-1) */

 	i__3 = *n - i__ + 1;
 	sgelqt3_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + 1]
 		, ldt, &iinfo);
 	if (i__ + ib <= *m) {

 /*     Update by applying H**T to A(I:M,I+IB:N) from the right */

 	    i__3 = *m - i__ - ib + 1;
 	    i__4 = *n - i__ + 1;
 	    i__5 = *m - i__ - ib + 1;
 	    slarfb_("R", "N", "F", "R", &i__3, &i__4, &ib, &a[i__ + i__ * 
 		    a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + ib + 
 		    i__ * a_dim1], lda, &work[1], &i__5);
 	}
    }
    return 0;

 /*     End of SGELQT */

 } /* sgelqt_ */

--- a/lapack-netlib/SRC/sgelqt3.c
+++ b/lapack-netlib/SRC/sgelqt3.c
@@ -0,0 +1,657 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static real c_b7 = 1.f;
 static real c_b19 = -1.f;

 /* > \brief \b SGELQT3 */

 /*  Definition: */
 /*  =========== */

 /*        SUBROUTINE SGELQT3( M, N, A, LDA, T, LDT, INFO ) */

 /*       INTEGER   INFO, LDA, M, N, LDT */
 /*       REAL   A( LDA, * ), T( LDT, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGELQT3 recursively computes a LQ factorization of a real M-by-N */
 /* > matrix A, using the compact WY representation of Q. */
 /* > */
 /* > Based on the algorithm of Elmroth and Gustavson, */
 /* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M =< N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
 /* >          below the diagonal contain the N-by-N lower triangular matrix L; the */
 /* >          elements above the diagonal are the rows of V.  See below for */
 /* >          further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (LDT,N) */
 /* >          The N-by-N upper triangular factor of the block reflector. */
 /* >          The elements on and above the diagonal contain the block */
 /* >          reflector T; the elements below the diagonal are not used. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup doubleGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th row */
 /* >  above the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1  v1 v1 v1 v1 ) */
 /* >                   (     1  v2 v2 v2 ) */
 /* >                   (     1  v3 v3 v3 ) */
 /* > */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
 /* >  block reflector H is then given by */
 /* > */
 /* >               H = I - V * T * V**T */
 /* > */
 /* >  where V**T is the transpose of V. */
 /* > */
 /* >  For details of the algorithm, see Elmroth and Gustavson (cited above). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgelqt3_(integer *m, integer *n, real *a, integer *lda, 
 	real *t, integer *ldt, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2;

    /* Local variables */
    integer i__, j, iinfo;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
 	    integer *, real *, real *, integer *, real *, integer *, real *, 
 	    real *, integer *);
    integer i1, j1, m1, m2;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
 	    integer *, integer *, real *, real *, integer *, real *, integer *
 	    ), xerbla_(char *, integer *, ftnlen), slarfg_(integer *, real *, real *, integer *, real *);


 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < *m) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*ldt < f2cmax(1,*m)) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGELQT3", &i__1, (ftnlen)7);
 	return 0;
    }

    if (*m == 1) {

 /*        Compute Householder transform when N=1 */

 	slarfg_(n, &a[a_offset], &a[f2cmin(2,*n) * a_dim1 + 1], lda, &t[t_offset]
 		);

    } else {

 /*        Otherwise, split A into blocks... */

 	m1 = *m / 2;
 	m2 = *m - m1;
 /* Computing MIN */
 	i__1 = m1 + 1;
 	i1 = f2cmin(i__1,*m);
 /* Computing MIN */
 	i__1 = *m + 1;
 	j1 = f2cmin(i__1,*n);

 /*        Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */

 	sgelqt3_(&m1, n, &a[a_offset], lda, &t[t_offset], ldt, &iinfo);

 /*        Compute A(J1:M,1:N) = Q1^H A(J1:M,1:N) [workspace: T(1:N1,J1:N)] */

 	i__1 = m2;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = m1;
 	    for (j = 1; j <= i__2; ++j) {
 		t[i__ + m1 + j * t_dim1] = a[i__ + m1 + j * a_dim1];
 	    }
 	}
 	strmm_("R", "U", "T", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 
 		+ t_dim1], ldt);

 	i__1 = *n - m1;
 	sgemm_("N", "T", &m2, &m1, &i__1, &c_b7, &a[i1 + i1 * a_dim1], lda, &
 		a[i1 * a_dim1 + 1], lda, &c_b7, &t[i1 + t_dim1], ldt);

 	strmm_("R", "U", "N", "N", &m2, &m1, &c_b7, &t[t_offset], ldt, &t[i1 
 		+ t_dim1], ldt);

 	i__1 = *n - m1;
 	sgemm_("N", "N", &m2, &i__1, &m1, &c_b19, &t[i1 + t_dim1], ldt, &a[i1 
 		* a_dim1 + 1], lda, &c_b7, &a[i1 + i1 * a_dim1], lda);

 	strmm_("R", "U", "N", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 
 		+ t_dim1], ldt);

 	i__1 = m2;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = m1;
 	    for (j = 1; j <= i__2; ++j) {
 		a[i__ + m1 + j * a_dim1] -= t[i__ + m1 + j * t_dim1];
 		t[i__ + m1 + j * t_dim1] = 0.f;
 	    }
 	}

 /*        Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */

 	i__1 = *n - m1;
 	sgelqt3_(&m2, &i__1, &a[i1 + i1 * a_dim1], lda, &t[i1 + i1 * t_dim1], 
 		ldt, &iinfo);

 /*        Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 */

 	i__1 = m2;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = m1;
 	    for (j = 1; j <= i__2; ++j) {
 		t[j + (i__ + m1) * t_dim1] = a[j + (i__ + m1) * a_dim1];
 	    }
 	}

 	strmm_("R", "U", "T", "U", &m1, &m2, &c_b7, &a[i1 + i1 * a_dim1], lda,
 		 &t[i1 * t_dim1 + 1], ldt);

 	i__1 = *n - *m;
 	sgemm_("N", "T", &m1, &m2, &i__1, &c_b7, &a[j1 * a_dim1 + 1], lda, &a[
 		i1 + j1 * a_dim1], lda, &c_b7, &t[i1 * t_dim1 + 1], ldt);

 	strmm_("L", "U", "N", "N", &m1, &m2, &c_b19, &t[t_offset], ldt, &t[i1 
 		* t_dim1 + 1], ldt);

 	strmm_("R", "U", "N", "N", &m1, &m2, &c_b7, &t[i1 + i1 * t_dim1], ldt,
 		 &t[i1 * t_dim1 + 1], ldt);



 /*        Y = (Y1,Y2); L = [ L1            0  ];  T = [T1 T3] */
 /*                         [ A(1:N1,J1:N)  L2 ]       [ 0 T2] */

    }

    return 0;

 /*     End of SGELQT3 */

 } /* sgelqt3_ */

--- a/lapack-netlib/SRC/sgels.c
+++ b/lapack-netlib/SRC/sgels.c
@@ -0,0 +1,954 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static real c_b33 = 0.f;
 static integer c__0 = 0;

 /* > \brief <b> SGELS solves overdetermined or underdetermined systems for GE matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGELS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgels.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgels.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgels.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */
 /*                         INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS */
 /*       REAL               A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGELS solves overdetermined or underdetermined real linear systems */
 /* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */
 /* > factorization of A.  It is assumed that A has full rank. */
 /* > */
 /* > The following options are provided: */
 /* > */
 /* > 1. If TRANS = 'N' and m >= n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A*X ||. */
 /* > */
 /* > 2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
 /* >    an underdetermined system A * X = B. */
 /* > */
 /* > 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of */
 /* >    an underdetermined system A**T * X = B. */
 /* > */
 /* > 4. If TRANS = 'T' and m < n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A**T * X ||. */
 /* > */
 /* > Several right hand side vectors b and solution vectors x can be */
 /* > handled in a single call; they are stored as the columns of the */
 /* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
 /* > matrix X. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N': the linear system involves A; */
 /* >          = 'T': the linear system involves A**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of */
 /* >          columns of the matrices B and X. NRHS >=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >            if M >= N, A is overwritten by details of its QR */
 /* >                       factorization as returned by SGEQRF; */
 /* >            if M <  N, A is overwritten by details of its LQ */
 /* >                       factorization as returned by SGELQF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          On entry, the matrix B of right hand side vectors, stored */
 /* >          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
 /* >          if TRANS = 'T'. */
 /* >          On exit, if INFO = 0, B is overwritten by the solution */
 /* >          vectors, stored columnwise: */
 /* >          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
 /* >          squares solution vectors; the residual sum of squares for the */
 /* >          solution in each column is given by the sum of squares of */
 /* >          elements N+1 to M in that column; */
 /* >          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'T' and m >= n, rows 1 to M of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'T' and m < n, rows 1 to M of B contain the */
 /* >          least squares solution vectors; the residual sum of squares */
 /* >          for the solution in each column is given by the sum of */
 /* >          squares of elements M+1 to N in that column. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= MAX(1,M,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */
 /* >          For optimal performance, */
 /* >          LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS )*NB ). */
 /* >          where MN = f2cmin(M,N) and NB is the optimum block size. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO =  i, the i-th diagonal element of the */
 /* >                triangular factor of A is zero, so that A does not have */
 /* >                full rank; the least squares solution could not be */
 /* >                computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int sgels_(char *trans, integer *m, integer *n, integer *
 	nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, 
 	integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    real anrm, bnrm;
    integer brow;
    logical tpsd;
    integer i__, j, iascl, ibscl;
    extern logical lsame_(char *, char *);
    integer wsize;
    real rwork[1];
    integer nb;
    extern /* Subroutine */ int slabad_(real *, real *);
    integer mn;
    extern real slamch_(char *), slange_(char *, integer *, integer *,
 	     real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer scllen;
    real bignum;
    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
 	    *, real *, real *, integer *, integer *), slascl_(char *, integer 
 	    *, integer *, real *, real *, integer *, integer *, real *, 
 	    integer *, integer *), sgeqrf_(integer *, integer *, real 
 	    *, integer *, real *, real *, integer *, integer *), slaset_(char 
 	    *, integer *, integer *, real *, real *, real *, integer *);
    real smlnum;
    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, real *, 
 	    integer *, integer *);
    logical lquery;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, real *, 
 	    integer *, integer *), strtrs_(char *, char *, 
 	    char *, integer *, integer *, real *, integer *, real *, integer *
 	    , integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    mn = f2cmin(*m,*n);
    lquery = *lwork == -1;
    if (! (lsame_(trans, "N") || lsame_(trans, "T"))) {
 	*info = -1;
    } else if (*m < 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -6;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m);
 	if (*ldb < f2cmax(i__1,*n)) {
 	    *info = -8;
 	} else /* if(complicated condition) */ {
 /* Computing MAX */
 	    i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs);
 	    if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
 		*info = -10;
 	    }
 	}
    }

 /*     Figure out optimal block size */

    if (*info == 0 || *info == -10) {

 	tpsd = TRUE_;
 	if (lsame_(trans, "N")) {
 	    tpsd = FALSE_;
 	}

 	if (*m >= *n) {
 	    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    if (tpsd) {
 /* Computing MAX */
 		i__1 = nb, i__2 = ilaenv_(&c__1, "SORMQR", "LN", m, nrhs, n, &
 			c_n1, (ftnlen)6, (ftnlen)2);
 		nb = f2cmax(i__1,i__2);
 	    } else {
 /* Computing MAX */
 		i__1 = nb, i__2 = ilaenv_(&c__1, "SORMQR", "LT", m, nrhs, n, &
 			c_n1, (ftnlen)6, (ftnlen)2);
 		nb = f2cmax(i__1,i__2);
 	    }
 	} else {
 	    nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    if (tpsd) {
 /* Computing MAX */
 		i__1 = nb, i__2 = ilaenv_(&c__1, "SORMLQ", "LT", n, nrhs, m, &
 			c_n1, (ftnlen)6, (ftnlen)2);
 		nb = f2cmax(i__1,i__2);
 	    } else {
 /* Computing MAX */
 		i__1 = nb, i__2 = ilaenv_(&c__1, "SORMLQ", "LN", n, nrhs, m, &
 			c_n1, (ftnlen)6, (ftnlen)2);
 		nb = f2cmax(i__1,i__2);
 	    }
 	}

 /* Computing MAX */
 	i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs) * nb;
 	wsize = f2cmax(i__1,i__2);
 	work[1] = (real) wsize;

    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGELS ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

 /* Computing MIN */
    i__1 = f2cmin(*m,*n);
    if (f2cmin(i__1,*nrhs) == 0) {
 	i__1 = f2cmax(*m,*n);
 	slaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
 	return 0;
    }

 /*     Get machine parameters */

    smlnum = slamch_("S") / slamch_("P");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

 /*     Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */

    anrm = slange_("M", m, n, &a[a_offset], lda, rwork);
    iascl = 0;
    if (anrm > 0.f && anrm < smlnum) {

 /*        Scale matrix norm up to SMLNUM */

 	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 1;
    } else if (anrm > bignum) {

 /*        Scale matrix norm down to BIGNUM */

 	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 2;
    } else if (anrm == 0.f) {

 /*        Matrix all zero. Return zero solution. */

 	i__1 = f2cmax(*m,*n);
 	slaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
 	goto L50;
    }

    brow = *m;
    if (tpsd) {
 	brow = *n;
    }
    bnrm = slange_("M", &brow, nrhs, &b[b_offset], ldb, rwork);
    ibscl = 0;
    if (bnrm > 0.f && bnrm < smlnum) {

 /*        Scale matrix norm up to SMLNUM */

 	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 1;
    } else if (bnrm > bignum) {

 /*        Scale matrix norm down to BIGNUM */

 	slascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 2;
    }

    if (*m >= *n) {

 /*        compute QR factorization of A */

 	i__1 = *lwork - mn;
 	sgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
 		;

 /*        workspace at least N, optimally N*NB */

 	if (! tpsd) {

 /*           Least-Squares Problem f2cmin || A * X - B || */

 /*           B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */

 	    i__1 = *lwork - mn;
 	    sormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[
 		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);

 /*           workspace at least NRHS, optimally NRHS*NB */

 /*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */

 	    strtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset]
 		    , lda, &b[b_offset], ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 	    scllen = *n;

 	} else {

 /*           Underdetermined system of equations A**T * X = B */

 /*           B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */

 	    strtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], 
 		    lda, &b[b_offset], ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 /*           B(N+1:M,1:NRHS) = ZERO */

 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = *n + 1; i__ <= i__2; ++i__) {
 		    b[i__ + j * b_dim1] = 0.f;
 /* L10: */
 		}
 /* L20: */
 	    }

 /*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */

 	    i__1 = *lwork - mn;
 	    sormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, &
 		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);

 /*           workspace at least NRHS, optimally NRHS*NB */

 	    scllen = *m;

 	}

    } else {

 /*        Compute LQ factorization of A */

 	i__1 = *lwork - mn;
 	sgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
 		;

 /*        workspace at least M, optimally M*NB. */

 	if (! tpsd) {

 /*           underdetermined system of equations A * X = B */

 /*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */

 	    strtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset]
 		    , lda, &b[b_offset], ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 /*           B(M+1:N,1:NRHS) = 0 */

 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *n;
 		for (i__ = *m + 1; i__ <= i__2; ++i__) {
 		    b[i__ + j * b_dim1] = 0.f;
 /* L30: */
 		}
 /* L40: */
 	    }

 /*           B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */

 	    i__1 = *lwork - mn;
 	    sormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[
 		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);

 /*           workspace at least NRHS, optimally NRHS*NB */

 	    scllen = *n;

 	} else {

 /*           overdetermined system f2cmin || A**T * X - B || */

 /*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */

 	    i__1 = *lwork - mn;
 	    sormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, &
 		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);

 /*           workspace at least NRHS, optimally NRHS*NB */

 /*           B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */

 	    strtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], 
 		    lda, &b[b_offset], ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 	    scllen = *m;

 	}

    }

 /*     Undo scaling */

    if (iascl == 1) {
 	slascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (iascl == 2) {
 	slascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }
    if (ibscl == 1) {
 	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (ibscl == 2) {
 	slascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }

 L50:
    work[1] = (real) wsize;

    return 0;

 /*     End of SGELS */

 } /* sgels_ */

--- a/lapack-netlib/SRC/sgelss.c
+++ b/lapack-netlib/SRC/sgelss.c
--- a/lapack-netlib/SRC/sgelsy.c
+++ b/lapack-netlib/SRC/sgelsy.c
@@ -0,0 +1,939 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__0 = 0;
 static real c_b31 = 0.f;
 static integer c__2 = 2;
 static real c_b54 = 1.f;

 /* > \brief <b> SGELSY solves overdetermined or underdetermined systems for GE matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGELSY + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgelsy.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgelsy.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgelsy.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, */
 /*                          WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */
 /*       REAL               RCOND */
 /*       INTEGER            JPVT( * ) */
 /*       REAL               A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGELSY computes the minimum-norm solution to a real linear least */
 /* > squares problem: */
 /* >     minimize || A * X - B || */
 /* > using a complete orthogonal factorization of A.  A is an M-by-N */
 /* > matrix which may be rank-deficient. */
 /* > */
 /* > Several right hand side vectors b and solution vectors x can be */
 /* > handled in a single call; they are stored as the columns of the */
 /* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
 /* > matrix X. */
 /* > */
 /* > The routine first computes a QR factorization with column pivoting: */
 /* >     A * P = Q * [ R11 R12 ] */
 /* >                 [  0  R22 ] */
 /* > with R11 defined as the largest leading submatrix whose estimated */
 /* > condition number is less than 1/RCOND.  The order of R11, RANK, */
 /* > is the effective rank of A. */
 /* > */
 /* > Then, R22 is considered to be negligible, and R12 is annihilated */
 /* > by orthogonal transformations from the right, arriving at the */
 /* > complete orthogonal factorization: */
 /* >    A * P = Q * [ T11 0 ] * Z */
 /* >                [  0  0 ] */
 /* > The minimum-norm solution is then */
 /* >    X = P * Z**T [ inv(T11)*Q1**T*B ] */
 /* >                 [        0         ] */
 /* > where Q1 consists of the first RANK columns of Q. */
 /* > */
 /* > This routine is basically identical to the original xGELSX except */
 /* > three differences: */
 /* >   o The call to the subroutine xGEQPF has been substituted by the */
 /* >     the call to the subroutine xGEQP3. This subroutine is a Blas-3 */
 /* >     version of the QR factorization with column pivoting. */
 /* >   o Matrix B (the right hand side) is updated with Blas-3. */
 /* >   o The permutation of matrix B (the right hand side) is faster and */
 /* >     more simple. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of */
 /* >          columns of matrices B and X. NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, A has been overwritten by details of its */
 /* >          complete orthogonal factorization. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          On entry, the M-by-NRHS right hand side matrix B. */
 /* >          On exit, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,M,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] JPVT */
 /* > \verbatim */
 /* >          JPVT is INTEGER array, dimension (N) */
 /* >          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */
 /* >          to the front of AP, otherwise column i is a free column. */
 /* >          On exit, if JPVT(i) = k, then the i-th column of AP */
 /* >          was the k-th column of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          RCOND is used to determine the effective rank of A, which */
 /* >          is defined as the order of the largest leading triangular */
 /* >          submatrix R11 in the QR factorization with pivoting of A, */
 /* >          whose estimated condition number < 1/RCOND. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RANK */
 /* > \verbatim */
 /* >          RANK is INTEGER */
 /* >          The effective rank of A, i.e., the order of the submatrix */
 /* >          R11.  This is the same as the order of the submatrix T11 */
 /* >          in the complete orthogonal factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          The unblocked strategy requires that: */
 /* >             LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */
 /* >          where MN = f2cmin( M, N ). */
 /* >          The block algorithm requires that: */
 /* >             LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */
 /* >          where NB is an upper bound on the blocksize returned */
 /* >          by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR, */
 /* >          and SORMRZ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: If INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEsolve */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n */
 /* >    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */
 /* >    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgelsy_(integer *m, integer *n, integer *nrhs, real *a, 
 	integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, 
 	integer *rank, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    real r__1, r__2;

    /* Local variables */
    real anrm, bnrm, smin, smax;
    integer i__, j, iascl, ibscl, ismin, ismax;
    real c1, c2;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
 	    integer *);
    real wsize, s1, s2;
    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
 	    integer *, integer *, real *, real *, integer *, real *, integer *
 	    ), slaic1_(integer *, integer *, 
 	    real *, real *, real *, real *, real *, real *, real *), sgeqp3_(
 	    integer *, integer *, real *, integer *, integer *, real *, real *
 	    , integer *, integer *);
    integer nb;
    extern /* Subroutine */ int slabad_(real *, real *);
    integer mn;
    extern real slamch_(char *), slange_(char *, integer *, integer *,
 	     real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    real bignum;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
 	    real *, integer *);
    integer lwkmin, nb1, nb2, nb3, nb4;
    real sminpr, smaxpr, smlnum;
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, real *, 
 	    integer *, integer *), sormrz_(char *, char *, 
 	    integer *, integer *, integer *, integer *, real *, integer *, 
 	    real *, real *, integer *, real *, integer *, integer *), stzrzf_(integer *, integer *, real *, integer *, real *, 
 	    real *, integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --jpvt;
    --work;

    /* Function Body */
    mn = f2cmin(*m,*n);
    ismin = mn + 1;
    ismax = (mn << 1) + 1;

 /*     Test the input arguments. */

    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m);
 	if (*ldb < f2cmax(i__1,*n)) {
 	    *info = -7;
 	}
    }

 /*     Figure out optimal block size */

    if (*info == 0) {
 	if (mn == 0 || *nrhs == 0) {
 	    lwkmin = 1;
 	    lwkopt = 1;
 	} else {
 	    nb1 = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb2 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb3 = ilaenv_(&c__1, "SORMQR", " ", m, n, nrhs, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    nb4 = ilaenv_(&c__1, "SORMRQ", " ", m, n, nrhs, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 /* Computing MAX */
 	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
 	    nb = f2cmax(i__1,nb4);
 /* Computing MAX */
 	    i__1 = mn << 1, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = mn + 
 		    *nrhs;
 	    lwkmin = mn + f2cmax(i__1,i__2);
 /* Computing MAX */
 	    i__1 = lwkmin, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = f2cmax(
 		    i__1,i__2), i__2 = (mn << 1) + nb * *nrhs;
 	    lwkopt = f2cmax(i__1,i__2);
 	}
 	work[1] = (real) lwkopt;

 	if (*lwork < lwkmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGELSY", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (mn == 0 || *nrhs == 0) {
 	*rank = 0;
 	return 0;
    }

 /*     Get machine parameters */

    smlnum = slamch_("S") / slamch_("P");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

 /*     Scale A, B if f2cmax entries outside range [SMLNUM,BIGNUM] */

    anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
    iascl = 0;
    if (anrm > 0.f && anrm < smlnum) {

 /*        Scale matrix norm up to SMLNUM */

 	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 1;
    } else if (anrm > bignum) {

 /*        Scale matrix norm down to BIGNUM */

 	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 2;
    } else if (anrm == 0.f) {

 /*        Matrix all zero. Return zero solution. */

 	i__1 = f2cmax(*m,*n);
 	slaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb);
 	*rank = 0;
 	goto L70;
    }

    bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
    ibscl = 0;
    if (bnrm > 0.f && bnrm < smlnum) {

 /*        Scale matrix norm up to SMLNUM */

 	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
 		 info);
 	ibscl = 1;
    } else if (bnrm > bignum) {

 /*        Scale matrix norm down to BIGNUM */

 	slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
 		 info);
 	ibscl = 2;
    }

 /*     Compute QR factorization with column pivoting of A: */
 /*        A * P = Q * R */

    i__1 = *lwork - mn;
    sgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1,
 	     info);
    wsize = mn + work[mn + 1];

 /*     workspace: MN+2*N+NB*(N+1). */
 /*     Details of Householder rotations stored in WORK(1:MN). */

 /*     Determine RANK using incremental condition estimation */

    work[ismin] = 1.f;
    work[ismax] = 1.f;
    smax = (r__1 = a[a_dim1 + 1], abs(r__1));
    smin = smax;
    if ((r__1 = a[a_dim1 + 1], abs(r__1)) == 0.f) {
 	*rank = 0;
 	i__1 = f2cmax(*m,*n);
 	slaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb);
 	goto L70;
    } else {
 	*rank = 1;
    }

 L10:
    if (*rank < mn) {
 	i__ = *rank + 1;
 	slaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[
 		i__ + i__ * a_dim1], &sminpr, &s1, &c1);
 	slaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[
 		i__ + i__ * a_dim1], &smaxpr, &s2, &c2);

 	if (smaxpr * *rcond <= sminpr) {
 	    i__1 = *rank;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1];
 		work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1];
 /* L20: */
 	    }
 	    work[ismin + *rank] = c1;
 	    work[ismax + *rank] = c2;
 	    smin = sminpr;
 	    smax = smaxpr;
 	    ++(*rank);
 	    goto L10;
 	}
    }

 /*     workspace: 3*MN. */

 /*     Logically partition R = [ R11 R12 ] */
 /*                             [  0  R22 ] */
 /*     where R11 = R(1:RANK,1:RANK) */

 /*     [R11,R12] = [ T11, 0 ] * Y */

    if (*rank < *n) {
 	i__1 = *lwork - (mn << 1);
 	stzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + 
 		1], &i__1, info);
    }

 /*     workspace: 2*MN. */
 /*     Details of Householder rotations stored in WORK(MN+1:2*MN) */

 /*     B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */

    i__1 = *lwork - (mn << 1);
    sormqr_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], &
 	    b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info);
 /* Computing MAX */
    r__1 = wsize, r__2 = (mn << 1) + work[(mn << 1) + 1];
    wsize = f2cmax(r__1,r__2);

 /*     workspace: 2*MN+NB*NRHS. */

 /*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */

    strsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b54, &
 	    a[a_offset], lda, &b[b_offset], ldb);

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *n;
 	for (i__ = *rank + 1; i__ <= i__2; ++i__) {
 	    b[i__ + j * b_dim1] = 0.f;
 /* L30: */
 	}
 /* L40: */
    }

 /*     B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) */

    if (*rank < *n) {
 	i__1 = *n - *rank;
 	i__2 = *lwork - (mn << 1);
 	sormrz_("Left", "Transpose", n, nrhs, rank, &i__1, &a[a_offset], lda, 
 		&work[mn + 1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__2,
 		 info);
    }

 /*     workspace: 2*MN+NRHS. */

 /*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    work[jpvt[i__]] = b[i__ + j * b_dim1];
 /* L50: */
 	}
 	scopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1);
 /* L60: */
    }

 /*     workspace: N. */

 /*     Undo scaling */

    if (iascl == 1) {
 	slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
 		 info);
 	slascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], 
 		lda, info);
    } else if (iascl == 2) {
 	slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
 		 info);
 	slascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], 
 		lda, info);
    }
    if (ibscl == 1) {
 	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
 		 info);
    } else if (ibscl == 2) {
 	slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
 		 info);
    }

 L70:
    work[1] = (real) lwkopt;

    return 0;

 /*     End of SGELSY */

 } /* sgelsy_ */

--- a/lapack-netlib/SRC/sgemlq.c
+++ b/lapack-netlib/SRC/sgemlq.c
@@ -0,0 +1,681 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGEMLQ */

 /*  Definition: */
 /*  =========== */

 /*      SUBROUTINE SGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, */
 /*     $                   TSIZE, C, LDC, WORK, LWORK, INFO ) */


 /*      CHARACTER          SIDE, TRANS */
 /*      INTEGER            INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */
 /*      REAL               A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) */

 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* >     SGEMLQ overwrites the general real M-by-N matrix C with */
 /* > */
 /* >                    SIDE = 'L'     SIDE = 'R' */
 /* >    TRANS = 'N':      Q * C          C * Q */
 /* >    TRANS = 'T':      Q**T * C       C * Q**T */
 /* >    where Q is a real orthogonal matrix defined as the product */
 /* >    of blocked elementary reflectors computed by short wide LQ */
 /* >    factorization (SGELQ) */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'L': apply Q or Q**T from the Left; */
 /* >          = 'R': apply Q or Q**T from the Right. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N':  No transpose, apply Q; */
 /* >          = 'T':  Transpose, apply Q**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix C. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* >          The number of elementary reflectors whose product defines */
 /* >          the matrix Q. */
 /* >          If SIDE = 'L', M >= K >= 0; */
 /* >          if SIDE = 'R', N >= K >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension */
 /* >                               (LDA,M) if SIDE = 'L', */
 /* >                               (LDA,N) if SIDE = 'R' */
 /* >          Part of the data structure to represent Q as returned by DGELQ. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,K). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (MAX(5,TSIZE)). */
 /* >          Part of the data structure to represent Q as returned by SGELQ. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TSIZE */
 /* > \verbatim */
 /* >          TSIZE is INTEGER */
 /* >          The dimension of the array T. TSIZE >= 5. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is REAL array, dimension (LDC,N) */
 /* >          On entry, the M-by-N matrix C. */
 /* >          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDC */
 /* > \verbatim */
 /* >          LDC is INTEGER */
 /* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >         (workspace) REAL array, dimension (MAX(1,LWORK)) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1, then a workspace query is assumed. The routine */
 /* >          only calculates the size of the WORK array, returns this */
 /* >          value as WORK(1), and no error message related to WORK */
 /* >          is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \par Further Details */
 /*  ==================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > These details are particular for this LAPACK implementation. Users should not */
 /* > take them for granted. These details may change in the future, and are not likely */
 /* > true for another LAPACK implementation. These details are relevant if one wants */
 /* > to try to understand the code. They are not part of the interface. */
 /* > */
 /* > In this version, */
 /* > */
 /* >          T(2): row block size (MB) */
 /* >          T(3): column block size (NB) */
 /* >          T(6:TSIZE): data structure needed for Q, computed by */
 /* >                           SLASWLQ or SGELQT */
 /* > */
 /* >  Depending on the matrix dimensions M and N, and row and column */
 /* >  block sizes MB and NB returned by ILAENV, SGELQ will use either */
 /* >  SLASWLQ (if the matrix is wide-and-short) or SGELQT to compute */
 /* >  the LQ factorization. */
 /* >  This version of SGEMLQ will use either SLAMSWLQ or SGEMLQT to */
 /* >  multiply matrix Q by another matrix. */
 /* >  Further Details in SLAMSWLQ or SGEMLQT. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgemlq_(char *side, char *trans, integer *m, integer *n, 
 	integer *k, real *a, integer *lda, real *t, integer *tsize, real *c__,
 	 integer *ldc, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, c_dim1, c_offset, i__1;

    /* Local variables */
    logical left, tran;
    extern /* Subroutine */ int slamswlq_(char *, char *, integer *, integer *
 	    , integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *, integer *);
    extern logical lsame_(char *, char *);
    logical right;
    integer mb, nb, mn, lw, nblcks;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical notran, lquery;
    extern /* Subroutine */ int sgemlqt_(char *, char *, integer *, integer *,
 	     integer *, integer *, real *, integer *, real *, integer *, real 
 	    *, integer *, real *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /* ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --t;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    lquery = *lwork == -1;
    notran = lsame_(trans, "N");
    tran = lsame_(trans, "T");
    left = lsame_(side, "L");
    right = lsame_(side, "R");

    mb = (integer) t[2];
    nb = (integer) t[3];
    if (left) {
 	lw = *n * mb;
 	mn = *m;
    } else {
 	lw = *m * mb;
 	mn = *n;
    }

    if (nb > *k && mn > *k) {
 	if ((mn - *k) % (nb - *k) == 0) {
 	    nblcks = (mn - *k) / (nb - *k);
 	} else {
 	    nblcks = (mn - *k) / (nb - *k) + 1;
 	}
    } else {
 	nblcks = 1;
    }

    *info = 0;
    if (! left && ! right) {
 	*info = -1;
    } else if (! tran && ! notran) {
 	*info = -2;
    } else if (*m < 0) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*k < 0 || *k > mn) {
 	*info = -5;
    } else if (*lda < f2cmax(1,*k)) {
 	*info = -7;
    } else if (*tsize < 5) {
 	*info = -9;
    } else if (*ldc < f2cmax(1,*m)) {
 	*info = -11;
    } else if (*lwork < f2cmax(1,lw) && ! lquery) {
 	*info = -13;
    }

    if (*info == 0) {
 	work[1] = (real) lw;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEMLQ", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

 /* Computing MIN */
    i__1 = f2cmin(*m,*n);
    if (f2cmin(i__1,*k) == 0) {
 	return 0;
    }

 /* Computing MAX */
    i__1 = f2cmax(*m,*n);
    if (left && *m <= *k || right && *n <= *k || nb <= *k || nb >= f2cmax(i__1,*
 	    k)) {
 	sgemlqt_(side, trans, m, n, k, &mb, &a[a_offset], lda, &t[6], &mb, &
 		c__[c_offset], ldc, &work[1], info);
    } else {
 	slamswlq_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], &
 		mb, &c__[c_offset], ldc, &work[1], lwork, info);
    }

    work[1] = (real) lw;

    return 0;

 /*     End of SGEMLQ */

 } /* sgemlq_ */

--- a/lapack-netlib/SRC/sgemlqt.c
+++ b/lapack-netlib/SRC/sgemlqt.c
@@ -0,0 +1,686 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGEMLQT */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, */
 /*                          C, LDC, WORK, INFO ) */

 /*       CHARACTER SIDE, TRANS */
 /*       INTEGER   INFO, K, LDV, LDC, M, N, MB, LDT */
 /*       REAL      V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > DGEMLQT overwrites the general real M-by-N matrix C with */
 /* > */
 /* >                 SIDE = 'L'     SIDE = 'R' */
 /* > TRANS = 'N':      Q C            C Q */
 /* > TRANS = 'T':   Q**T C            C Q**T */
 /* > */
 /* > where Q is a real orthogonal matrix defined as the product of K */
 /* > elementary reflectors: */
 /* > */
 /* >       Q = H(1) H(2) . . . H(K) = I - V T V**T */
 /* > */
 /* > generated using the compact WY representation as returned by DGELQT. */
 /* > */
 /* > Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'L': apply Q or Q**T from the Left; */
 /* >          = 'R': apply Q or Q**T from the Right. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N':  No transpose, apply Q; */
 /* >          = 'C':  Transpose, apply Q**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix C. M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix C. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* >          The number of elementary reflectors whose product defines */
 /* >          the matrix Q. */
 /* >          If SIDE = 'L', M >= K >= 0; */
 /* >          if SIDE = 'R', N >= K >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] MB */
 /* > \verbatim */
 /* >          MB is INTEGER */
 /* >          The block size used for the storage of T.  K >= MB >= 1. */
 /* >          This must be the same value of MB used to generate T */
 /* >          in DGELQT. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] V */
 /* > \verbatim */
 /* >          V is REAL array, dimension */
 /* >                               (LDV,M) if SIDE = 'L', */
 /* >                               (LDV,N) if SIDE = 'R' */
 /* >          The i-th row must contain the vector which defines the */
 /* >          elementary reflector H(i), for i = 1,2,...,k, as returned by */
 /* >          DGELQT in the first K rows of its array argument A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the array V. LDV >= f2cmax(1,K). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (LDT,K) */
 /* >          The upper triangular factors of the block reflectors */
 /* >          as returned by DGELQT, stored as a MB-by-K matrix. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= MB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is REAL array, dimension (LDC,N) */
 /* >          On entry, the M-by-N matrix C. */
 /* >          On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDC */
 /* > \verbatim */
 /* >          LDC is INTEGER */
 /* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array. The dimension of */
 /* >          WORK is N*MB if SIDE = 'L', or  M*MB if SIDE = 'R'. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2017 */

 /* > \ingroup doubleGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgemlqt_(char *side, char *trans, integer *m, integer *n,
 	 integer *k, integer *mb, real *v, integer *ldv, real *t, integer *
 	ldt, real *c__, integer *ldc, real *work, integer *info)
 {
    /* System generated locals */
    integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, 
 	    i__3, i__4;

    /* Local variables */
    logical left, tran;
    integer i__;
    extern logical lsame_(char *, char *);
    logical right;
    integer ib, kf;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
    logical notran;
    integer ldwork;


 /*  -- LAPACK computational routine (version 3.8.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2017 */


 /*  ===================================================================== */



    /* Parameter adjustments */
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    *info = 0;
    left = lsame_(side, "L");
    right = lsame_(side, "R");
    tran = lsame_(trans, "T");
    notran = lsame_(trans, "N");

    if (left) {
 	ldwork = f2cmax(1,*n);
    } else if (right) {
 	ldwork = f2cmax(1,*m);
    }
    if (! left && ! right) {
 	*info = -1;
    } else if (! tran && ! notran) {
 	*info = -2;
    } else if (*m < 0) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*k < 0) {
 	*info = -5;
    } else if (*mb < 1 || *mb > *k && *k > 0) {
 	*info = -6;
    } else if (*ldv < f2cmax(1,*k)) {
 	*info = -8;
    } else if (*ldt < *mb) {
 	*info = -10;
    } else if (*ldc < f2cmax(1,*m)) {
 	*info = -12;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEMLQT", &i__1, (ftnlen)7);
 	return 0;
    }


    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
    }

    if (left && notran) {

 	i__1 = *k;
 	i__2 = *mb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = *mb, i__4 = *k - i__ + 1;
 	    ib = f2cmin(i__3,i__4);
 	    i__3 = *m - i__ + 1;
 	    slarfb_("L", "T", "F", "R", &i__3, n, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
 		    &work[1], &ldwork);
 	}

    } else if (right && tran) {

 	i__2 = *k;
 	i__1 = *mb;
 	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
 /* Computing MIN */
 	    i__3 = *mb, i__4 = *k - i__ + 1;
 	    ib = f2cmin(i__3,i__4);
 	    i__3 = *n - i__ + 1;
 	    slarfb_("R", "N", "F", "R", m, &i__3, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
 		    ldc, &work[1], &ldwork);
 	}

    } else if (left && tran) {

 	kf = (*k - 1) / *mb * *mb + 1;
 	i__1 = -(*mb);
 	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
 /* Computing MIN */
 	    i__2 = *mb, i__3 = *k - i__ + 1;
 	    ib = f2cmin(i__2,i__3);
 	    i__2 = *m - i__ + 1;
 	    slarfb_("L", "N", "F", "R", &i__2, n, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
 		    &work[1], &ldwork);
 	}

    } else if (right && notran) {

 	kf = (*k - 1) / *mb * *mb + 1;
 	i__1 = -(*mb);
 	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
 /* Computing MIN */
 	    i__2 = *mb, i__3 = *k - i__ + 1;
 	    ib = f2cmin(i__2,i__3);
 	    i__2 = *n - i__ + 1;
 	    slarfb_("R", "T", "F", "R", m, &i__2, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
 		    ldc, &work[1], &ldwork);
 	}

    }

    return 0;

 /*     End of SGEMLQT */

 } /* sgemlqt_ */

--- a/lapack-netlib/SRC/sgemqr.c
+++ b/lapack-netlib/SRC/sgemqr.c
@@ -0,0 +1,683 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGEMQR */

 /*  Definition: */
 /*  =========== */

 /*      SUBROUTINE SGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, */
 /*     $                   TSIZE, C, LDC, WORK, LWORK, INFO ) */


 /*     CHARACTER         SIDE, TRANS */
 /*     INTEGER           INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */
 /*     REAL              A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) */

 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEMQR overwrites the general real M-by-N matrix C with */
 /* > */
 /* >                      SIDE = 'L'     SIDE = 'R' */
 /* >     TRANS = 'N':      Q * C          C * Q */
 /* >     TRANS = 'T':      Q**T * C       C * Q**T */
 /* > */
 /* > where Q is a real orthogonal matrix defined as the product */
 /* > of blocked elementary reflectors computed by tall skinny */
 /* > QR factorization (SGEQR) */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'L': apply Q or Q**T from the Left; */
 /* >          = 'R': apply Q or Q**T from the Right. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N':  No transpose, apply Q; */
 /* >          = 'T':  Transpose, apply Q**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix C. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* >          The number of elementary reflectors whose product defines */
 /* >          the matrix Q. */
 /* >          If SIDE = 'L', M >= K >= 0; */
 /* >          if SIDE = 'R', N >= K >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,K) */
 /* >          Part of the data structure to represent Q as returned by SGEQR. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. */
 /* >          If SIDE = 'L', LDA >= f2cmax(1,M); */
 /* >          if SIDE = 'R', LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (MAX(5,TSIZE)). */
 /* >          Part of the data structure to represent Q as returned by SGEQR. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TSIZE */
 /* > \verbatim */
 /* >          TSIZE is INTEGER */
 /* >          The dimension of the array T. TSIZE >= 5. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is REAL array, dimension (LDC,N) */
 /* >          On entry, the M-by-N matrix C. */
 /* >          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDC */
 /* > \verbatim */
 /* >          LDC is INTEGER */
 /* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >         (workspace) REAL array, dimension (MAX(1,LWORK)) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1, then a workspace query is assumed. The routine */
 /* >          only calculates the size of the WORK array, returns this */
 /* >          value as WORK(1), and no error message related to WORK */
 /* >          is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \par Further Details */
 /*  ==================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > These details are particular for this LAPACK implementation. Users should not */
 /* > take them for granted. These details may change in the future, and are not likely */
 /* > true for another LAPACK implementation. These details are relevant if one wants */
 /* > to try to understand the code. They are not part of the interface. */
 /* > */
 /* > In this version, */
 /* > */
 /* >          T(2): row block size (MB) */
 /* >          T(3): column block size (NB) */
 /* >          T(6:TSIZE): data structure needed for Q, computed by */
 /* >                           SLATSQR or SGEQRT */
 /* > */
 /* >  Depending on the matrix dimensions M and N, and row and column */
 /* >  block sizes MB and NB returned by ILAENV, SGEQR will use either */
 /* >  SLATSQR (if the matrix is tall-and-skinny) or SGEQRT to compute */
 /* >  the QR factorization. */
 /* >  This version of SGEMQR will use either SLAMTSQR or SGEMQRT to */
 /* >  multiply matrix Q by another matrix. */
 /* >  Further Details in SLAMTSQR or SGEMQRT. */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgemqr_(char *side, char *trans, integer *m, integer *n, 
 	integer *k, real *a, integer *lda, real *t, integer *tsize, real *c__,
 	 integer *ldc, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, c_dim1, c_offset, i__1;

    /* Local variables */
    logical left, tran;
    extern /* Subroutine */ int slamtsqr_(char *, char *, integer *, integer *
 	    , integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *, integer *);
    extern logical lsame_(char *, char *);
    logical right;
    integer mb, nb, mn, lw, nblcks;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical notran, lquery;
    extern /* Subroutine */ int sgemqrt_(char *, char *, integer *, integer *,
 	     integer *, integer *, real *, integer *, real *, integer *, real 
 	    *, integer *, real *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /* ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --t;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    lquery = *lwork == -1;
    notran = lsame_(trans, "N");
    tran = lsame_(trans, "T");
    left = lsame_(side, "L");
    right = lsame_(side, "R");

    mb = (integer) t[2];
    nb = (integer) t[3];
    if (left) {
 	lw = *n * nb;
 	mn = *m;
    } else {
 	lw = mb * nb;
 	mn = *n;
    }

    if (mb > *k && mn > *k) {
 	if ((mn - *k) % (mb - *k) == 0) {
 	    nblcks = (mn - *k) / (mb - *k);
 	} else {
 	    nblcks = (mn - *k) / (mb - *k) + 1;
 	}
    } else {
 	nblcks = 1;
    }

    *info = 0;
    if (! left && ! right) {
 	*info = -1;
    } else if (! tran && ! notran) {
 	*info = -2;
    } else if (*m < 0) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*k < 0 || *k > mn) {
 	*info = -5;
    } else if (*lda < f2cmax(1,mn)) {
 	*info = -7;
    } else if (*tsize < 5) {
 	*info = -9;
    } else if (*ldc < f2cmax(1,*m)) {
 	*info = -11;
    } else if (*lwork < f2cmax(1,lw) && ! lquery) {
 	*info = -13;
    }

    if (*info == 0) {
 	work[1] = (real) lw;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEMQR", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

 /* Computing MIN */
    i__1 = f2cmin(*m,*n);
    if (f2cmin(i__1,*k) == 0) {
 	return 0;
    }

 /* Computing MAX */
    i__1 = f2cmax(*m,*n);
    if (left && *m <= *k || right && *n <= *k || mb <= *k || mb >= f2cmax(i__1,*
 	    k)) {
 	sgemqrt_(side, trans, m, n, k, &nb, &a[a_offset], lda, &t[6], &nb, &
 		c__[c_offset], ldc, &work[1], info);
    } else {
 	slamtsqr_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], &
 		nb, &c__[c_offset], ldc, &work[1], lwork, info);
    }

    work[1] = (real) lw;

    return 0;

 /*     End of SGEMQR */

 } /* sgemqr_ */

--- a/lapack-netlib/SRC/sgemqrt.c
+++ b/lapack-netlib/SRC/sgemqrt.c
@@ -0,0 +1,706 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGEMQRT */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEMQRT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgemqrt
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgemqrt
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgemqrt
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, */
 /*                          C, LDC, WORK, INFO ) */

 /*       CHARACTER SIDE, TRANS */
 /*       INTEGER   INFO, K, LDV, LDC, M, N, NB, LDT */
 /*       REAL   V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEMQRT overwrites the general real M-by-N matrix C with */
 /* > */
 /* >                 SIDE = 'L'     SIDE = 'R' */
 /* > TRANS = 'N':      Q C            C Q */
 /* > TRANS = 'T':   Q**T C            C Q**T */
 /* > */
 /* > where Q is a real orthogonal matrix defined as the product of K */
 /* > elementary reflectors: */
 /* > */
 /* >       Q = H(1) H(2) . . . H(K) = I - V T V**T */
 /* > */
 /* > generated using the compact WY representation as returned by SGEQRT. */
 /* > */
 /* > Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'L': apply Q or Q**T from the Left; */
 /* >          = 'R': apply Q or Q**T from the Right. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N':  No transpose, apply Q; */
 /* >          = 'T':  Transpose, apply Q**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix C. M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix C. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* >          The number of elementary reflectors whose product defines */
 /* >          the matrix Q. */
 /* >          If SIDE = 'L', M >= K >= 0; */
 /* >          if SIDE = 'R', N >= K >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB */
 /* > \verbatim */
 /* >          NB is INTEGER */
 /* >          The block size used for the storage of T.  K >= NB >= 1. */
 /* >          This must be the same value of NB used to generate T */
 /* >          in CGEQRT. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] V */
 /* > \verbatim */
 /* >          V is REAL array, dimension (LDV,K) */
 /* >          The i-th column must contain the vector which defines the */
 /* >          elementary reflector H(i), for i = 1,2,...,k, as returned by */
 /* >          CGEQRT in the first K columns of its array argument A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the array V. */
 /* >          If SIDE = 'L', LDA >= f2cmax(1,M); */
 /* >          if SIDE = 'R', LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (LDT,K) */
 /* >          The upper triangular factors of the block reflectors */
 /* >          as returned by CGEQRT, stored as a NB-by-N matrix. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= NB. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is REAL array, dimension (LDC,N) */
 /* >          On entry, the M-by-N matrix C. */
 /* >          On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDC */
 /* > \verbatim */
 /* >          LDC is INTEGER */
 /* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array. The dimension of WORK is */
 /* >           N*NB if SIDE = 'L', or  M*NB if SIDE = 'R'. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgemqrt_(char *side, char *trans, integer *m, integer *n,
 	 integer *k, integer *nb, real *v, integer *ldv, real *t, integer *
 	ldt, real *c__, integer *ldc, real *work, integer *info)
 {
    /* System generated locals */
    integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, 
 	    i__3, i__4;

    /* Local variables */
    logical left, tran;
    integer i__, q;
    extern logical lsame_(char *, char *);
    logical right;
    integer ib, kf;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
    logical notran;
    integer ldwork;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */



    /* Parameter adjustments */
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    *info = 0;
    left = lsame_(side, "L");
    right = lsame_(side, "R");
    tran = lsame_(trans, "T");
    notran = lsame_(trans, "N");

    if (left) {
 	ldwork = f2cmax(1,*n);
 	q = *m;
    } else if (right) {
 	ldwork = f2cmax(1,*m);
 	q = *n;
    }
    if (! left && ! right) {
 	*info = -1;
    } else if (! tran && ! notran) {
 	*info = -2;
    } else if (*m < 0) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -4;
    } else if (*k < 0 || *k > q) {
 	*info = -5;
    } else if (*nb < 1 || *nb > *k && *k > 0) {
 	*info = -6;
    } else if (*ldv < f2cmax(1,q)) {
 	*info = -8;
    } else if (*ldt < *nb) {
 	*info = -10;
    } else if (*ldc < f2cmax(1,*m)) {
 	*info = -12;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEMQRT", &i__1, (ftnlen)7);
 	return 0;
    }


    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
    }

    if (left && tran) {

 	i__1 = *k;
 	i__2 = *nb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = *nb, i__4 = *k - i__ + 1;
 	    ib = f2cmin(i__3,i__4);
 	    i__3 = *m - i__ + 1;
 	    slarfb_("L", "T", "F", "C", &i__3, n, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
 		    &work[1], &ldwork);
 	}

    } else if (right && notran) {

 	i__2 = *k;
 	i__1 = *nb;
 	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
 /* Computing MIN */
 	    i__3 = *nb, i__4 = *k - i__ + 1;
 	    ib = f2cmin(i__3,i__4);
 	    i__3 = *n - i__ + 1;
 	    slarfb_("R", "N", "F", "C", m, &i__3, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
 		    ldc, &work[1], &ldwork);
 	}

    } else if (left && notran) {

 	kf = (*k - 1) / *nb * *nb + 1;
 	i__1 = -(*nb);
 	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
 /* Computing MIN */
 	    i__2 = *nb, i__3 = *k - i__ + 1;
 	    ib = f2cmin(i__2,i__3);
 	    i__2 = *m - i__ + 1;
 	    slarfb_("L", "N", "F", "C", &i__2, n, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
 		    &work[1], &ldwork);
 	}

    } else if (right && tran) {

 	kf = (*k - 1) / *nb * *nb + 1;
 	i__1 = -(*nb);
 	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
 /* Computing MIN */
 	    i__2 = *nb, i__3 = *k - i__ + 1;
 	    ib = f2cmin(i__2,i__3);
 	    i__2 = *n - i__ + 1;
 	    slarfb_("R", "T", "F", "C", m, &i__2, &ib, &v[i__ + i__ * v_dim1],
 		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
 		    ldc, &work[1], &ldwork);
 	}

    }

    return 0;

 /*     End of SGEMQRT */

 } /* sgemqrt_ */

--- a/lapack-netlib/SRC/sgeql2.c
+++ b/lapack-netlib/SRC/sgeql2.c
@@ -0,0 +1,591 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorit
 hm. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQL2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeql2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeql2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeql2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQL2 computes a QL factorization of a real m by n matrix A: */
 /* > A = Q * L. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, if m >= n, the lower triangle of the subarray */
 /* >          A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; */
 /* >          if m <= n, the elements on and below the (n-m)-th */
 /* >          superdiagonal contain the m by n lower trapezoidal matrix L; */
 /* >          the remaining elements, with the array TAU, represent the */
 /* >          orthogonal matrix Q as a product of elementary reflectors */
 /* >          (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */
 /* >  A(1:m-k+i-1,n-k+i), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeql2_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
 	    integer *, real *, real *, integer *, real *), xerbla_(
 	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
 	    integer *, real *);
    real aii;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQL2", &i__1, (ftnlen)6);
 	return 0;
    }

    k = f2cmin(*m,*n);

    for (i__ = k; i__ >= 1; --i__) {

 /*        Generate elementary reflector H(i) to annihilate */
 /*        A(1:m-k+i-1,n-k+i) */

 	i__1 = *m - k + i__;
 	slarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[(*n - k 
 		+ i__) * a_dim1 + 1], &c__1, &tau[i__]);

 /*        Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left */

 	aii = a[*m - k + i__ + (*n - k + i__) * a_dim1];
 	a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.f;
 	i__1 = *m - k + i__;
 	i__2 = *n - k + i__ - 1;
 	slarf_("Left", &i__1, &i__2, &a[(*n - k + i__) * a_dim1 + 1], &c__1, &
 		tau[i__], &a[a_offset], lda, &work[1]);
 	a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii;
 /* L10: */
    }
    return 0;

 /*     End of SGEQL2 */

 } /* sgeql2_ */

--- a/lapack-netlib/SRC/sgeqlf.c
+++ b/lapack-netlib/SRC/sgeqlf.c
@@ -0,0 +1,709 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;

 /* > \brief \b SGEQLF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQLF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqlf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqlf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqlf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQLF computes a QL factorization of a real M-by-N matrix A: */
 /* > A = Q * L. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >          if m >= n, the lower triangle of the subarray */
 /* >          A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; */
 /* >          if m <= n, the elements on and below the (n-m)-th */
 /* >          superdiagonal contain the M-by-N lower trapezoidal matrix L; */
 /* >          the remaining elements, with the array TAU, represent the */
 /* >          orthogonal matrix Q as a product of elementary reflectors */
 /* >          (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For optimum performance LWORK >= N*NB, where NB is the */
 /* >          optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */
 /* >  A(1:m-k+i-1,n-k+i), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqlf_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int sgeql2_(integer *, integer *, real *, integer 
 	    *, real *, real *, integer *);
    integer ib, nb, ki, kk, mu, nu, nx;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
 	    real *, integer *, real *, real *, integer *);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }

    if (*info == 0) {
 	k = f2cmin(*m,*n);
 	if (k == 0) {
 	    lwkopt = 1;
 	} else {
 	    nb = ilaenv_(&c__1, "SGEQLF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    lwkopt = *n * nb;
 	}
 	work[1] = (real) lwkopt;

 	if (*lwork < f2cmax(1,*n) && ! lquery) {
 	    *info = -7;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQLF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (k == 0) {
 	return 0;
    }

    nbmin = 2;
    nx = 1;
    iws = *n;
    if (nb > 1 && nb < k) {

 /*        Determine when to cross over from blocked to unblocked code. */

 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQLF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {

 /*           Determine if workspace is large enough for blocked code. */

 	    ldwork = *n;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {

 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */

 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQLF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }

    if (nb >= nbmin && nb < k && nx < k) {

 /*        Use blocked code initially. */
 /*        The last kk columns are handled by the block method. */

 	ki = (k - nx - 1) / nb * nb;
 /* Computing MIN */
 	i__1 = k, i__2 = ki + nb;
 	kk = f2cmin(i__1,i__2);

 	i__1 = k - kk + 1;
 	i__2 = -nb;
 	for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ 
 		+= i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);

 /*           Compute the QL factorization of the current block */
 /*           A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */

 	    i__3 = *m - k + i__ + ib - 1;
 	    sgeql2_(&i__3, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &tau[
 		    i__], &work[1], &iinfo);
 	    if (*n - k + i__ > 1) {

 /*              Form the triangular factor of the block reflector */
 /*              H = H(i+ib-1) . . . H(i+1) H(i) */

 		i__3 = *m - k + i__ + ib - 1;
 		slarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - k + 
 			i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork);

 /*              Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */

 		i__3 = *m - k + i__ + ib - 1;
 		i__4 = *n - k + i__ - 1;
 		slarfb_("Left", "Transpose", "Backward", "Columnwise", &i__3, 
 			&i__4, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &
 			work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &
 			ldwork);
 	    }
 /* L10: */
 	}
 	mu = *m - k + i__ + nb - 1;
 	nu = *n - k + i__ + nb - 1;
    } else {
 	mu = *m;
 	nu = *n;
    }

 /*     Use unblocked code to factor the last or only block */

    if (mu > 0 && nu > 0) {
 	sgeql2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    }

    work[1] = (real) iws;
    return 0;

 /*     End of SGEQLF */

 } /* sgeqlf_ */

--- a/lapack-netlib/SRC/sgeqp3.c
+++ b/lapack-netlib/SRC/sgeqp3.c
@@ -0,0 +1,789 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;

 /* > \brief \b SGEQP3 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQP3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqp3.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqp3.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqp3.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       INTEGER            JPVT( * ) */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQP3 computes a QR factorization with column pivoting of a */
 /* > matrix A:  A*P = Q*R  using Level 3 BLAS. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A. M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the upper triangle of the array contains the */
 /* >          f2cmin(M,N)-by-N upper trapezoidal matrix R; the elements below */
 /* >          the diagonal, together with the array TAU, represent the */
 /* >          orthogonal matrix Q as a product of f2cmin(M,N) elementary */
 /* >          reflectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] JPVT */
 /* > \verbatim */
 /* >          JPVT is INTEGER array, dimension (N) */
 /* >          On entry, if JPVT(J).ne.0, the J-th column of A is permuted */
 /* >          to the front of A*P (a leading column); if JPVT(J)=0, */
 /* >          the J-th column of A is a free column. */
 /* >          On exit, if JPVT(J)=K, then the J-th column of A*P was the */
 /* >          the K-th column of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO=0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= 3*N+1. */
 /* >          For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB */
 /* >          is the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit. */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real/complex vector */
 /* >  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in */
 /* >  A(i+1:m,i), and tau in TAU(i). */
 /* > \endverbatim */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
 /* >    X. Sun, Computer Science Dept., Duke University, USA */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqp3_(integer *m, integer *n, real *a, integer *lda, 
 	integer *jpvt, real *tau, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;

    /* Local variables */
    integer nfxd;
    extern real snrm2_(integer *, real *, integer *);
    integer j, nbmin, minmn, minws;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
 	    integer *), slaqp2_(integer *, integer *, integer *, real *, 
 	    integer *, integer *, real *, real *, real *, real *);
    integer jb, na, nb, sm, sn, nx;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
 	    *, real *, real *, integer *, integer *);
    integer topbmn, sminmn;
    extern /* Subroutine */ int slaqps_(integer *, integer *, integer *, 
 	    integer *, integer *, real *, integer *, integer *, real *, real *
 	    , real *, real *, real *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, real *, 
 	    integer *, integer *);
    integer fjb, iws;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */

 /*     Test input arguments */
 /*  ==================== */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --jpvt;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }

    if (*info == 0) {
 	minmn = f2cmin(*m,*n);
 	if (minmn == 0) {
 	    iws = 1;
 	    lwkopt = 1;
 	} else {
 	    iws = *n * 3 + 1;
 	    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    lwkopt = (*n << 1) + (*n + 1) * nb;
 	}
 	work[1] = (real) lwkopt;

 	if (*lwork < iws && ! lquery) {
 	    *info = -8;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQP3", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Move initial columns up front. */

    nfxd = 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	if (jpvt[j] != 0) {
 	    if (j != nfxd) {
 		sswap_(m, &a[j * a_dim1 + 1], &c__1, &a[nfxd * a_dim1 + 1], &
 			c__1);
 		jpvt[j] = jpvt[nfxd];
 		jpvt[nfxd] = j;
 	    } else {
 		jpvt[j] = j;
 	    }
 	    ++nfxd;
 	} else {
 	    jpvt[j] = j;
 	}
 /* L10: */
    }
    --nfxd;

 /*     Factorize fixed columns */
 /*  ======================= */

 /*     Compute the QR factorization of fixed columns and update */
 /*     remaining columns. */

    if (nfxd > 0) {
 	na = f2cmin(*m,nfxd);
 /* CC      CALL SGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) */
 	sgeqrf_(m, &na, &a[a_offset], lda, &tau[1], &work[1], lwork, info);
 /* Computing MAX */
 	i__1 = iws, i__2 = (integer) work[1];
 	iws = f2cmax(i__1,i__2);
 	if (na < *n) {
 /* CC         CALL SORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, */
 /* CC  $                   TAU, A( 1, NA+1 ), LDA, WORK, INFO ) */
 	    i__1 = *n - na;
 	    sormqr_("Left", "Transpose", m, &i__1, &na, &a[a_offset], lda, &
 		    tau[1], &a[(na + 1) * a_dim1 + 1], lda, &work[1], lwork, 
 		    info);
 /* Computing MAX */
 	    i__1 = iws, i__2 = (integer) work[1];
 	    iws = f2cmax(i__1,i__2);
 	}
    }

 /*     Factorize free columns */
 /*  ====================== */

    if (nfxd < minmn) {

 	sm = *m - nfxd;
 	sn = *n - nfxd;
 	sminmn = minmn - nfxd;

 /*        Determine the block size. */

 	nb = ilaenv_(&c__1, "SGEQRF", " ", &sm, &sn, &c_n1, &c_n1, (ftnlen)6, 
 		(ftnlen)1);
 	nbmin = 2;
 	nx = 0;

 	if (nb > 1 && nb < sminmn) {

 /*           Determine when to cross over from blocked to unblocked code. */

 /* Computing MAX */
 	    i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", &sm, &sn, &c_n1, &
 		    c_n1, (ftnlen)6, (ftnlen)1);
 	    nx = f2cmax(i__1,i__2);


 	    if (nx < sminmn) {

 /*              Determine if workspace is large enough for blocked code. */

 		minws = (sn << 1) + (sn + 1) * nb;
 		iws = f2cmax(iws,minws);
 		if (*lwork < minws) {

 /*                 Not enough workspace to use optimal NB: Reduce NB and */
 /*                 determine the minimum value of NB. */

 		    nb = (*lwork - (sn << 1)) / (sn + 1);
 /* Computing MAX */
 		    i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", &sm, &sn, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 		    nbmin = f2cmax(i__1,i__2);


 		}
 	    }
 	}

 /*        Initialize partial column norms. The first N elements of work */
 /*        store the exact column norms. */

 	i__1 = *n;
 	for (j = nfxd + 1; j <= i__1; ++j) {
 	    work[j] = snrm2_(&sm, &a[nfxd + 1 + j * a_dim1], &c__1);
 	    work[*n + j] = work[j];
 /* L20: */
 	}

 	if (nb >= nbmin && nb < sminmn && nx < sminmn) {

 /*           Use blocked code initially. */

 	    j = nfxd + 1;

 /*           Compute factorization: while loop. */


 	    topbmn = minmn - nx;
 L30:
 	    if (j <= topbmn) {
 /* Computing MIN */
 		i__1 = nb, i__2 = topbmn - j + 1;
 		jb = f2cmin(i__1,i__2);

 /*              Factorize JB columns among columns J:N. */

 		i__1 = *n - j + 1;
 		i__2 = j - 1;
 		i__3 = *n - j + 1;
 		slaqps_(m, &i__1, &i__2, &jb, &fjb, &a[j * a_dim1 + 1], lda, &
 			jpvt[j], &tau[j], &work[j], &work[*n + j], &work[(*n 
 			<< 1) + 1], &work[(*n << 1) + jb + 1], &i__3);

 		j += fjb;
 		goto L30;
 	    }
 	} else {
 	    j = nfxd + 1;
 	}

 /*        Use unblocked code to factor the last or only block. */


 	if (j <= minmn) {
 	    i__1 = *n - j + 1;
 	    i__2 = j - 1;
 	    slaqp2_(m, &i__1, &i__2, &a[j * a_dim1 + 1], lda, &jpvt[j], &tau[
 		    j], &work[j], &work[*n + j], &work[(*n << 1) + 1]);
 	}

    }

    work[1] = (real) iws;
    return 0;

 /*     End of SGEQP3 */

 } /* sgeqp3_ */

--- a/lapack-netlib/SRC/sgeqr.c
+++ b/lapack-netlib/SRC/sgeqr.c
@@ -0,0 +1,733 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;

 /* > \brief \b SGEQR */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, */
 /*                         INFO ) */

 /*       INTEGER           INFO, LDA, M, N, TSIZE, LWORK */
 /*       REAL              A( LDA, * ), T( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQR computes a QR factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a M-by-M orthogonal matrix; */
 /* >    R is an upper-triangular N-by-N matrix; */
 /* >    0 is a (M-N)-by-N zero matrix, if M > N. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R */
 /* >          (R is upper triangular if M >= N); */
 /* >          the elements below the diagonal are used to store part of the */
 /* >          data structure to represent Q. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (MAX(5,TSIZE)) */
 /* >          On exit, if INFO = 0, T(1) returns optimal (or either minimal */
 /* >          or optimal, if query is assumed) TSIZE. See TSIZE for details. */
 /* >          Remaining T contains part of the data structure used to represent Q. */
 /* >          If one wants to apply or construct Q, then one needs to keep T */
 /* >          (in addition to A) and pass it to further subroutines. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TSIZE */
 /* > \verbatim */
 /* >          TSIZE is INTEGER */
 /* >          If TSIZE >= 5, the dimension of the array T. */
 /* >          If TSIZE = -1 or -2, then a workspace query is assumed. The routine */
 /* >          only calculates the sizes of the T and WORK arrays, returns these */
 /* >          values as the first entries of the T and WORK arrays, and no error */
 /* >          message related to T or WORK is issued by XERBLA. */
 /* >          If TSIZE = -1, the routine calculates optimal size of T for the */
 /* >          optimum performance and returns this value in T(1). */
 /* >          If TSIZE = -2, the routine calculates minimal size of T and */
 /* >          returns this value in T(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          (workspace) REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
 /* >          or optimal, if query was assumed) LWORK. */
 /* >          See LWORK for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1 or -2, then a workspace query is assumed. The routine */
 /* >          only calculates the sizes of the T and WORK arrays, returns these */
 /* >          values as the first entries of the T and WORK arrays, and no error */
 /* >          message related to T or WORK is issued by XERBLA. */
 /* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
 /* >          optimal performance and returns this value in WORK(1). */
 /* >          If LWORK = -2, the routine calculates minimal size of WORK and */
 /* >          returns this value in WORK(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \par Further Details */
 /*  ==================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > The goal of the interface is to give maximum freedom to the developers for */
 /* > creating any QR factorization algorithm they wish. The triangular */
 /* > (trapezoidal) R has to be stored in the upper part of A. The lower part of A */
 /* > and the array T can be used to store any relevant information for applying or */
 /* > constructing the Q factor. The WORK array can safely be discarded after exit. */
 /* > */
 /* > Caution: One should not expect the sizes of T and WORK to be the same from one */
 /* > LAPACK implementation to the other, or even from one execution to the other. */
 /* > A workspace query (for T and WORK) is needed at each execution. However, */
 /* > for a given execution, the size of T and WORK are fixed and will not change */
 /* > from one query to the next. */
 /* > */
 /* > \endverbatim */
 /* > */
 /* > \par Further Details particular to this LAPACK implementation: */
 /*  ============================================================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > These details are particular for this LAPACK implementation. Users should not */
 /* > take them for granted. These details may change in the future, and are not likely */
 /* > true for another LAPACK implementation. These details are relevant if one wants */
 /* > to try to understand the code. They are not part of the interface. */
 /* > */
 /* > In this version, */
 /* > */
 /* >          T(2): row block size (MB) */
 /* >          T(3): column block size (NB) */
 /* >          T(6:TSIZE): data structure needed for Q, computed by */
 /* >                           SLATSQR or SGEQRT */
 /* > */
 /* >  Depending on the matrix dimensions M and N, and row and column */
 /* >  block sizes MB and NB returned by ILAENV, SGEQR will use either */
 /* >  SLATSQR (if the matrix is tall-and-skinny) or SGEQRT to compute */
 /* >  the QR factorization. */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqr_(integer *m, integer *n, real *a, integer *lda, 
 	real *t, integer *tsize, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    logical mint, minw;
    integer mb, nb, nblcks;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int sgeqrt_(integer *, integer *, integer *, real 
 	    *, integer *, real *, integer *, real *, integer *);
    logical lminws, lquery;
    integer mintsz;
    extern /* Subroutine */ int slatsqr_(integer *, integer *, integer *, 
 	    integer *, real *, integer *, real *, integer *, real *, integer *
 	    , integer *);


 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */
 /*     November 2019 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --t;
    --work;

    /* Function Body */
    *info = 0;

    lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2;

    mint = FALSE_;
    minw = FALSE_;
    if (*tsize == -2 || *lwork == -2) {
 	if (*tsize != -1) {
 	    mint = TRUE_;
 	}
 	if (*lwork != -1) {
 	    minw = TRUE_;
 	}
    }

 /*     Determine the block size */

    if (f2cmin(*m,*n) > 0) {
 	mb = ilaenv_(&c__1, "SGEQR ", " ", m, n, &c__1, &c_n1, (ftnlen)6, (
 		ftnlen)1);
 	nb = ilaenv_(&c__1, "SGEQR ", " ", m, n, &c__2, &c_n1, (ftnlen)6, (
 		ftnlen)1);
    } else {
 	mb = *m;
 	nb = 1;
    }
    if (mb > *m || mb <= *n) {
 	mb = *m;
    }
    if (nb > f2cmin(*m,*n) || nb < 1) {
 	nb = 1;
    }
    mintsz = *n + 5;
    if (mb > *n && *m > *n) {
 	if ((*m - *n) % (mb - *n) == 0) {
 	    nblcks = (*m - *n) / (mb - *n);
 	} else {
 	    nblcks = (*m - *n) / (mb - *n) + 1;
 	}
    } else {
 	nblcks = 1;
    }

 /*     Determine if the workspace size satisfies minimal size */

    lminws = FALSE_;
 /* Computing MAX */
    i__1 = 1, i__2 = nb * *n * nblcks + 5;
    if ((*tsize < f2cmax(i__1,i__2) || *lwork < nb * *n) && *lwork >= *n && *
 	    tsize >= mintsz && ! lquery) {
 /* Computing MAX */
 	i__1 = 1, i__2 = nb * *n * nblcks + 5;
 	if (*tsize < f2cmax(i__1,i__2)) {
 	    lminws = TRUE_;
 	    nb = 1;
 	    mb = *m;
 	}
 	if (*lwork < nb * *n) {
 	    lminws = TRUE_;
 	    nb = 1;
 	}
    }

    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = nb * *n * nblcks + 5;
 	if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
 	    *info = -6;
 	} else /* if(complicated condition) */ {
 /* Computing MAX */
 	    i__1 = 1, i__2 = *n * nb;
 	    if (*lwork < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
 		*info = -8;
 	    }
 	}
    }

    if (*info == 0) {
 	if (mint) {
 	    t[1] = (real) mintsz;
 	} else {
 	    t[1] = (real) (nb * *n * nblcks + 5);
 	}
 	t[2] = (real) mb;
 	t[3] = (real) nb;
 	if (minw) {
 	    work[1] = (real) f2cmax(1,*n);
 	} else {
 /* Computing MAX */
 	    i__1 = 1, i__2 = nb * *n;
 	    work[1] = (real) f2cmax(i__1,i__2);
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQR", &i__1, (ftnlen)5);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (f2cmin(*m,*n) == 0) {
 	return 0;
    }

 /*     The QR Decomposition */

    if (*m <= *n || mb <= *n || mb >= *m) {
 	sgeqrt_(m, n, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], info);
    } else {
 	slatsqr_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], 
 		lwork, info);
    }

 /* Computing MAX */
    i__1 = 1, i__2 = nb * *n;
    work[1] = (real) f2cmax(i__1,i__2);

    return 0;

 /*     End of SGEQR */

 } /* sgeqr_ */

--- a/lapack-netlib/SRC/sgeqr2.c
+++ b/lapack-netlib/SRC/sgeqr2.c
@@ -0,0 +1,602 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorit
 hm. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQR2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqr2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqr2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqr2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQR2 computes a QR factorization of a real m-by-n matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a m-by-m orthogonal matrix; */
 /* >    R is an upper-triangular n-by-n matrix; */
 /* >    0 is a (m-n)-by-n zero matrix, if m > n. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */
 /* >          upper triangular if m >= n); the elements below the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2019 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
 /* >  and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqr2_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;

    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
 	    integer *, real *, real *, integer *, real *), xerbla_(
 	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
 	    integer *, real *);
    real aii;


 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQR2", &i__1, (ftnlen)6);
 	return 0;
    }

    k = f2cmin(*m,*n);

    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */

 	i__2 = *m - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
 	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1]
 		, &c__1, &tau[i__]);
 	if (i__ < *n) {

 /*           Apply H(i) to A(i:m,i+1:n) from the left */

 	    aii = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.f;
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
 		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
 /* L10: */
    }
    return 0;

 /*     End of SGEQR2 */

 } /* sgeqr2_ */

--- a/lapack-netlib/SRC/sgeqr2p.c
+++ b/lapack-netlib/SRC/sgeqr2p.c
@@ -0,0 +1,607 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagona
 l elements using an unblocked algorithm. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQR2P + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqr2p
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqr2p
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqr2p
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQR2P computes a QR factorization of a real m-by-n matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a m-by-m orthogonal matrix; */
 /* >    R is an upper-triangular n-by-n matrix with nonnegative diagonal */
 /* >    entries; */
 /* >    0 is a (m-n)-by-n zero matrix, if m > n. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */
 /* >          upper triangular if m >= n). The diagonal entries of R */
 /* >          are nonnegative; the elements below the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2019 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
 /* >  and tau in TAU(i). */
 /* > */
 /* > See Lapack Working Note 203 for details */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqr2p_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;

    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
 	    integer *, real *, real *, integer *, real *), xerbla_(
 	    char *, integer *, ftnlen);
    real aii;
    extern /* Subroutine */ int slarfgp_(integer *, real *, real *, integer *,
 	     real *);


 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQR2P", &i__1, (ftnlen)7);
 	return 0;
    }

    k = f2cmin(*m,*n);

    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */

 	i__2 = *m - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
 	slarfgp_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * 
 		a_dim1], &c__1, &tau[i__]);
 	if (i__ < *n) {

 /*           Apply H(i) to A(i:m,i+1:n) from the left */

 	    aii = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.f;
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
 		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
 /* L10: */
    }
    return 0;

 /*     End of SGEQR2P */

 } /* sgeqr2p_ */

--- a/lapack-netlib/SRC/sgeqrf.c
+++ b/lapack-netlib/SRC/sgeqrf.c
@@ -0,0 +1,700 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;

 /* > \brief \b SGEQRF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQRF computes a QR factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a M-by-M orthogonal matrix; */
 /* >    R is an upper-triangular N-by-N matrix; */
 /* >    0 is a (M-N)-by-N zero matrix, if M > N. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
 /* >          upper triangular if m >= n); the elements below the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of f2cmin(m,n) elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For optimum performance LWORK >= N*NB, where NB is */
 /* >          the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2019 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
 /* >  and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqrf_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer 
 	    *, real *, real *, integer *);
    integer ib, nb, nx;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
 	    real *, integer *, real *, real *, integer *);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;


 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
    lwkopt = *n * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQRF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    k = f2cmin(*m,*n);
    if (k == 0) {
 	work[1] = 1.f;
 	return 0;
    }

    nbmin = 2;
    nx = 0;
    iws = *n;
    if (nb > 1 && nb < k) {

 /*        Determine when to cross over from blocked to unblocked code. */

 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {

 /*           Determine if workspace is large enough for blocked code. */

 	    ldwork = *n;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {

 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */

 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }

    if (nb >= nbmin && nb < k && nx < k) {

 /*        Use blocked code initially */

 	i__1 = k - nx;
 	i__2 = nb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);

 /*           Compute the QR factorization of the current block */
 /*           A(i:m,i:i+ib-1) */

 	    i__3 = *m - i__ + 1;
 	    sgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
 		    1], &iinfo);
 	    if (i__ + ib <= *n) {

 /*              Form the triangular factor of the block reflector */
 /*              H = H(i) H(i+1) . . . H(i+ib-1) */

 		i__3 = *m - i__ + 1;
 		slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * 
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);

 /*              Apply H**T to A(i:m,i+ib:n) from the left */

 		i__3 = *m - i__ + 1;
 		i__4 = *n - i__ - ib + 1;
 		slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib 
 			+ 1], &ldwork);
 	    }
 /* L10: */
 	}
    } else {
 	i__ = 1;
    }

 /*     Use unblocked code to factor the last or only block. */

    if (i__ <= k) {
 	i__2 = *m - i__ + 1;
 	i__1 = *n - i__ + 1;
 	sgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
 		, &iinfo);
    }

    work[1] = (real) iws;
    return 0;

 /*     End of SGEQRF */

 } /* sgeqrf_ */

--- a/lapack-netlib/SRC/sgeqrfp.c
+++ b/lapack-netlib/SRC/sgeqrfp.c
@@ -0,0 +1,703 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;

 /* > \brief \b SGEQRFP */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQRFP + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrfp
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrfp
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrfp
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQR2P computes a QR factorization of a real M-by-N matrix A: */
 /* > */
 /* >    A = Q * ( R ), */
 /* >            ( 0 ) */
 /* > */
 /* > where: */
 /* > */
 /* >    Q is a M-by-M orthogonal matrix; */
 /* >    R is an upper-triangular N-by-N matrix with nonnegative diagonal */
 /* >    entries; */
 /* >    0 is a (M-N)-by-N zero matrix, if M > N. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
 /* >          upper triangular if m >= n). The diagonal entries of R */
 /* >          are nonnegative; the elements below the diagonal, */
 /* >          with the array TAU, represent the orthogonal matrix Q as a */
 /* >          product of f2cmin(m,n) elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For optimum performance LWORK >= N*NB, where NB is */
 /* >          the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date November 2019 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
 /* >  and tau in TAU(i). */
 /* > */
 /* > See Lapack Working Note 203 for details */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqrfp_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, k, nbmin, iinfo, ib, nb, nx;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
 	    real *, integer *, real *, real *, integer *);
    integer ldwork, lwkopt;
    logical lquery;
    extern /* Subroutine */ int sgeqr2p_(integer *, integer *, real *, 
 	    integer *, real *, real *, integer *);
    integer iws;


 /*  -- LAPACK computational routine (version 3.9.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     November 2019 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
    lwkopt = *n * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQRFP", &i__1, (ftnlen)7);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    k = f2cmin(*m,*n);
    if (k == 0) {
 	work[1] = 1.f;
 	return 0;
    }

    nbmin = 2;
    nx = 0;
    iws = *n;
    if (nb > 1 && nb < k) {

 /*        Determine when to cross over from blocked to unblocked code. */

 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {

 /*           Determine if workspace is large enough for blocked code. */

 	    ldwork = *n;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {

 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */

 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }

    if (nb >= nbmin && nb < k && nx < k) {

 /*        Use blocked code initially */

 	i__1 = k - nx;
 	i__2 = nb;
 	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);

 /*           Compute the QR factorization of the current block */
 /*           A(i:m,i:i+ib-1) */

 	    i__3 = *m - i__ + 1;
 	    sgeqr2p_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
 		    work[1], &iinfo);
 	    if (i__ + ib <= *n) {

 /*              Form the triangular factor of the block reflector */
 /*              H = H(i) H(i+1) . . . H(i+ib-1) */

 		i__3 = *m - i__ + 1;
 		slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * 
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);

 /*              Apply H**T to A(i:m,i+ib:n) from the left */

 		i__3 = *m - i__ + 1;
 		i__4 = *n - i__ - ib + 1;
 		slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib 
 			+ 1], &ldwork);
 	    }
 /* L10: */
 	}
    } else {
 	i__ = 1;
    }

 /*     Use unblocked code to factor the last or only block. */

    if (i__ <= k) {
 	i__2 = *m - i__ + 1;
 	i__1 = *n - i__ + 1;
 	sgeqr2p_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
 		1], &iinfo);
    }

    work[1] = (real) iws;
    return 0;

 /*     End of SGEQRFP */

 } /* sgeqrfp_ */

--- a/lapack-netlib/SRC/sgeqrt.c
+++ b/lapack-netlib/SRC/sgeqrt.c
@@ -0,0 +1,628 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGEQRT */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQRT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrt.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrt.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrt.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) */

 /*       INTEGER INFO, LDA, LDT, M, N, NB */
 /*       REAL A( LDA, * ), T( LDT, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQRT computes a blocked QR factorization of a real M-by-N matrix A */
 /* > using the compact WY representation of Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB */
 /* > \verbatim */
 /* >          NB is INTEGER */
 /* >          The block size to be used in the blocked QR.  MIN(M,N) >= NB >= 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
 /* >          upper triangular if M >= N); the elements below the diagonal */
 /* >          are the columns of V. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (LDT,MIN(M,N)) */
 /* >          The upper triangular block reflectors stored in compact form */
 /* >          as a sequence of upper triangular blocks.  See below */
 /* >          for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= NB. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (NB*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
 /* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1       ) */
 /* >                   ( v1  1    ) */
 /* >                   ( v1 v2  1 ) */
 /* >                   ( v1 v2 v3 ) */
 /* >                   ( v1 v2 v3 ) */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A. */
 /* > */
 /* >  Let K=MIN(M,N).  The number of blocks is B = ceiling(K/NB), where each */
 /* >  block is of order NB except for the last block, which is of order */
 /* >  IB = K - (B-1)*NB.  For each of the B blocks, a upper triangular block */
 /* >  reflector factor is computed: T1, T2, ..., TB.  The NB-by-NB (and IB-by-IB */
 /* >  for the last block) T's are stored in the NB-by-K matrix T as */
 /* > */
 /* >               T = (T1 T2 ... TB). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqrt_(integer *m, integer *n, integer *nb, real *a, 
 	integer *lda, real *t, integer *ldt, real *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5;

    /* Local variables */
    integer i__, k, iinfo, ib;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen), sgeqrt2_(
 	    integer *, integer *, real *, integer *, real *, integer *, 
 	    integer *), sgeqrt3_(integer *, integer *, real *, integer *, 
 	    real *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /* ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nb < 1 || *nb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldt < *nb) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQRT", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    k = f2cmin(*m,*n);
    if (k == 0) {
 	return 0;
    }

 /*     Blocked loop of length K */

    i__1 = k;
    i__2 = *nb;
    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 /* Computing MIN */
 	i__3 = k - i__ + 1;
 	ib = f2cmin(i__3,*nb);

 /*     Compute the QR factorization of the current block A(I:M,I:I+IB-1) */

 	if (TRUE_) {
 	    i__3 = *m - i__ + 1;
 	    sgeqrt3_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 
 		    + 1], ldt, &iinfo);
 	} else {
 	    i__3 = *m - i__ + 1;
 	    sgeqrt2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 
 		    + 1], ldt, &iinfo);
 	}
 	if (i__ + ib <= *n) {

 /*     Update by applying H**T to A(I:M,I+IB:N) from the left */

 	    i__3 = *m - i__ + 1;
 	    i__4 = *n - i__ - ib + 1;
 	    i__5 = *n - i__ - ib + 1;
 	    slarfb_("L", "T", "F", "C", &i__3, &i__4, &ib, &a[i__ + i__ * 
 		    a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + (i__ + 
 		    ib) * a_dim1], lda, &work[1], &i__5);
 	}
    }
    return 0;

 /*     End of SGEQRT */

 } /* sgeqrt_ */

--- a/lapack-netlib/SRC/sgeqrt2.c
+++ b/lapack-netlib/SRC/sgeqrt2.c
@@ -0,0 +1,645 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static real c_b5 = 1.f;
 static real c_b7 = 0.f;

 /* > \brief \b SGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY re
 presentation of Q. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQRT2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrt2
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrt2
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrt2
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGEQRT2( M, N, A, LDA, T, LDT, INFO ) */

 /*       INTEGER   INFO, LDA, LDT, M, N */
 /*       REAL   A( LDA, * ), T( LDT, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQRT2 computes a QR factorization of a real M-by-N matrix A, */
 /* > using the compact WY representation of Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
 /* >          above the diagonal contain the N-by-N upper triangular matrix R; the */
 /* >          elements below the diagonal are the columns of V.  See below for */
 /* >          further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (LDT,N) */
 /* >          The N-by-N upper triangular factor of the block reflector. */
 /* >          The elements on and above the diagonal contain the block */
 /* >          reflector T; the elements below the diagonal are not used. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
 /* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1       ) */
 /* >                   ( v1  1    ) */
 /* >                   ( v1 v2  1 ) */
 /* >                   ( v1 v2 v3 ) */
 /* >                   ( v1 v2 v3 ) */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
 /* >  block reflector H is then given by */
 /* > */
 /* >               H = I - V * T * V**T */
 /* > */
 /* >  where V**T is the transpose of V. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqrt2_(integer *m, integer *n, real *a, integer *lda, 
 	real *t, integer *ldt, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3;

    /* Local variables */
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
 	    integer *, real *, integer *, real *, integer *);
    integer i__, k;
    real alpha;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
 	    real *, integer *, real *, integer *, real *, real *, integer *), strmv_(char *, char *, char *, integer *, real *, 
 	    integer *, real *, integer *), xerbla_(
 	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
 	    integer *, real *);
    real aii;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*ldt < f2cmax(1,*n)) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQRT2", &i__1, (ftnlen)7);
 	return 0;
    }

    k = f2cmin(*m,*n);

    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) */

 	i__2 = *m - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
 	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1]
 		, &c__1, &t[i__ + t_dim1]);
 	if (i__ < *n) {

 /*           Apply H(i) to A(I:M,I+1:N) from the left */

 	    aii = a[i__ + i__ * a_dim1];
 	    a[i__ + i__ * a_dim1] = 1.f;

 /*           W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] */

 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    sgemv_("T", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], 
 		    lda, &a[i__ + i__ * a_dim1], &c__1, &c_b7, &t[*n * t_dim1 
 		    + 1], &c__1);

 /*           A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H */

 	    alpha = -t[i__ + t_dim1];
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
 	    sger_(&i__2, &i__3, &alpha, &a[i__ + i__ * a_dim1], &c__1, &t[*n *
 		     t_dim1 + 1], &c__1, &a[i__ + (i__ + 1) * a_dim1], lda);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
    }

    i__1 = *n;
    for (i__ = 2; i__ <= i__1; ++i__) {
 	aii = a[i__ + i__ * a_dim1];
 	a[i__ + i__ * a_dim1] = 1.f;

 /*        T(1:I-1,I) := alpha * A(I:M,1:I-1)**T * A(I:M,I) */

 	alpha = -t[i__ + t_dim1];
 	i__2 = *m - i__ + 1;
 	i__3 = i__ - 1;
 	sgemv_("T", &i__2, &i__3, &alpha, &a[i__ + a_dim1], lda, &a[i__ + i__ 
 		* a_dim1], &c__1, &c_b7, &t[i__ * t_dim1 + 1], &c__1);
 	a[i__ + i__ * a_dim1] = aii;

 /*        T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) */

 	i__2 = i__ - 1;
 	strmv_("U", "N", "N", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], 
 		&c__1);

 /*           T(I,I) = tau(I) */

 	t[i__ + i__ * t_dim1] = t[i__ + t_dim1];
 	t[i__ + t_dim1] = 0.f;
    }

 /*     End of SGEQRT2 */

    return 0;
 } /* sgeqrt2_ */

--- a/lapack-netlib/SRC/sgeqrt3.c
+++ b/lapack-netlib/SRC/sgeqrt3.c
@@ -0,0 +1,678 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static real c_b8 = 1.f;
 static real c_b20 = -1.f;

 /* > \brief \b SGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the c
 ompact WY representation of Q. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGEQRT3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrt3
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrt3
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrt3
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*        SUBROUTINE SGEQRT3( M, N, A, LDA, T, LDT, INFO ) */

 /*       INTEGER   INFO, LDA, M, N, LDT */
 /*       REAL   A( LDA, * ), T( LDT, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGEQRT3 recursively computes a QR factorization of a real M-by-N */
 /* > matrix A, using the compact WY representation of Q. */
 /* > */
 /* > Based on the algorithm of Elmroth and Gustavson, */
 /* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
 /* >          above the diagonal contain the N-by-N upper triangular matrix R; the */
 /* >          elements below the diagonal are the columns of V.  See below for */
 /* >          further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (LDT,N) */
 /* >          The N-by-N upper triangular factor of the block reflector. */
 /* >          The elements on and above the diagonal contain the block */
 /* >          reflector T; the elements below the diagonal are not used. */
 /* >          See below for further details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
 /* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
 /* > */
 /* >               V = (  1       ) */
 /* >                   ( v1  1    ) */
 /* >                   ( v1 v2  1 ) */
 /* >                   ( v1 v2 v3 ) */
 /* >                   ( v1 v2 v3 ) */
 /* > */
 /* >  where the vi's represent the vectors which define H(i), which are returned */
 /* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
 /* >  block reflector H is then given by */
 /* > */
 /* >               H = I - V * T * V**T */
 /* > */
 /* >  where V**T is the transpose of V. */
 /* > */
 /* >  For details of the algorithm, see Elmroth and Gustavson (cited above). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgeqrt3_(integer *m, integer *n, real *a, integer *lda, 
 	real *t, integer *ldt, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2;

    /* Local variables */
    integer i__, j, iinfo;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
 	    integer *, real *, real *, integer *, real *, integer *, real *, 
 	    real *, integer *);
    integer i1, j1, n1, n2;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
 	    integer *, integer *, real *, real *, integer *, real *, integer *
 	    ), xerbla_(char *, integer *, ftnlen), slarfg_(integer *, real *, real *, integer *, real *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -2;
    } else if (*m < *n) {
 	*info = -1;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*ldt < f2cmax(1,*n)) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEQRT3", &i__1, (ftnlen)7);
 	return 0;
    }

    if (*n == 1) {

 /*        Compute Householder transform when N=1 */

 	slarfg_(m, &a[a_dim1 + 1], &a[f2cmin(2,*m) + a_dim1], &c__1, &t[t_dim1 + 
 		1]);

    } else {

 /*        Otherwise, split A into blocks... */

 	n1 = *n / 2;
 	n2 = *n - n1;
 /* Computing MIN */
 	i__1 = n1 + 1;
 	j1 = f2cmin(i__1,*n);
 /* Computing MIN */
 	i__1 = *n + 1;
 	i1 = f2cmin(i__1,*m);

 /*        Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */

 	sgeqrt3_(m, &n1, &a[a_offset], lda, &t[t_offset], ldt, &iinfo);

 /*        Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] */

 	i__1 = n2;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = n1;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 		t[i__ + (j + n1) * t_dim1] = a[i__ + (j + n1) * a_dim1];
 	    }
 	}
 	strmm_("L", "L", "T", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;

 	i__1 = *m - n1;
 	sgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[j1 + a_dim1], lda, &a[j1 
 		+ j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt);

 	strmm_("L", "U", "T", "N", &n1, &n2, &c_b8, &t[t_offset], ldt, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;

 	i__1 = *m - n1;
 	sgemm_("N", "N", &i__1, &n2, &n1, &c_b20, &a[j1 + a_dim1], lda, &t[j1 
 		* t_dim1 + 1], ldt, &c_b8, &a[j1 + j1 * a_dim1], lda);

 	strmm_("L", "L", "N", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 *
 		 t_dim1 + 1], ldt)
 		;

 	i__1 = n2;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = n1;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 		a[i__ + (j + n1) * a_dim1] -= t[i__ + (j + n1) * t_dim1];
 	    }
 	}

 /*        Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */

 	i__1 = *m - n1;
 	sgeqrt3_(&i__1, &n2, &a[j1 + j1 * a_dim1], lda, &t[j1 + j1 * t_dim1], 
 		ldt, &iinfo);

 /*        Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 */

 	i__1 = n1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    i__2 = n2;
 	    for (j = 1; j <= i__2; ++j) {
 		t[i__ + (j + n1) * t_dim1] = a[j + n1 + i__ * a_dim1];
 	    }
 	}

 	strmm_("R", "L", "N", "U", &n1, &n2, &c_b8, &a[j1 + j1 * a_dim1], lda,
 		 &t[j1 * t_dim1 + 1], ldt);

 	i__1 = *m - *n;
 	sgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[i1 + a_dim1], lda, &a[i1 
 		+ j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt);

 	strmm_("L", "U", "N", "N", &n1, &n2, &c_b20, &t[t_offset], ldt, &t[j1 
 		* t_dim1 + 1], ldt);

 	strmm_("R", "U", "N", "N", &n1, &n2, &c_b8, &t[j1 + j1 * t_dim1], ldt,
 		 &t[j1 * t_dim1 + 1], ldt);

 /*        Y = (Y1,Y2); R = [ R1  A(1:N1,J1:N) ];  T = [T1 T3] */
 /*                         [  0        R2     ]       [ 0 T2] */

    }

    return 0;

 /*     End of SGEQRT3 */

 } /* sgeqrt3_ */

--- a/lapack-netlib/SRC/sgerfs.c
+++ b/lapack-netlib/SRC/sgerfs.c
@@ -0,0 +1,879 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static real c_b15 = -1.f;
 static real c_b17 = 1.f;

 /* > \brief \b SGERFS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGERFS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgerfs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgerfs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgerfs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */
 /*                          X, LDX, FERR, BERR, WORK, IWORK, INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS */
 /*       INTEGER            IPIV( * ), IWORK( * ) */
 /*       REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
 /*      $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGERFS improves the computed solution to a system of linear */
 /* > equations and provides error bounds and backward error estimates for */
 /* > the solution. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrices B and X.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          The original N-by-N matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AF */
 /* > \verbatim */
 /* >          AF is REAL array, dimension (LDAF,N) */
 /* >          The factors L and U from the factorization A = P*L*U */
 /* >          as computed by SGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAF */
 /* > \verbatim */
 /* >          LDAF is INTEGER */
 /* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from SGETRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          The right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] X */
 /* > \verbatim */
 /* >          X is REAL array, dimension (LDX,NRHS) */
 /* >          On entry, the solution matrix X, as computed by SGETRS. */
 /* >          On exit, the improved solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is REAL array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is REAL array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (3*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  ITMAX is the maximum number of steps of iterative refinement. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgerfs_(char *trans, integer *n, integer *nrhs, real *a, 
 	integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, 
 	integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *
 	work, integer *iwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
 	    x_offset, i__1, i__2, i__3;
    real r__1, r__2, r__3;

    /* Local variables */
    integer kase;
    real safe1, safe2;
    integer i__, j, k;
    real s;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
 	    real *, integer *, real *, integer *, real *, real *, integer *);
    integer count;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
 	    integer *), saxpy_(integer *, real *, real *, integer *, real *, 
 	    integer *), slacn2_(integer *, real *, real *, integer *, real *, 
 	    integer *, integer *);
    real xk;
    extern real slamch_(char *);
    integer nz;
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    logical notran;
    extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, 
 	    integer *, integer *, real *, integer *, integer *);
    char transt[1];
    real lstres, eps;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldaf < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -10;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -12;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGERFS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	i__1 = *nrhs;
 	for (j = 1; j <= i__1; ++j) {
 	    ferr[j] = 0.f;
 	    berr[j] = 0.f;
 /* L10: */
 	}
 	return 0;
    }

    if (notran) {
 	*(unsigned char *)transt = 'T';
    } else {
 	*(unsigned char *)transt = 'N';
    }

 /*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

 /*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

 	count = 1;
 	lstres = 3.f;
 L20:

 /*        Loop until stopping criterion is satisfied. */

 /*        Compute residual R = B - op(A) * X, */
 /*        where op(A) = A, A**T, or A**H, depending on TRANS. */

 	scopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
 	sgemv_(trans, n, n, &c_b15, &a[a_offset], lda, &x[j * x_dim1 + 1], &
 		c__1, &c_b17, &work[*n + 1], &c__1);

 /*        Compute componentwise relative backward error from formula */

 /*        f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */

 /*        where abs(Z) is the componentwise absolute value of the matrix */
 /*        or vector Z.  If the i-th component of the denominator is less */
 /*        than SAFE2, then SAFE1 is added to the i-th components of the */
 /*        numerator and denominator before dividing. */

 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    work[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1));
 /* L30: */
 	}

 /*        Compute abs(op(A))*abs(X) + abs(B). */

 	if (notran) {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		xk = (r__1 = x[k + j * x_dim1], abs(r__1));
 		i__3 = *n;
 		for (i__ = 1; i__ <= i__3; ++i__) {
 		    work[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * xk;
 /* L40: */
 		}
 /* L50: */
 	    }
 	} else {
 	    i__2 = *n;
 	    for (k = 1; k <= i__2; ++k) {
 		s = 0.f;
 		i__3 = *n;
 		for (i__ = 1; i__ <= i__3; ++i__) {
 		    s += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 = x[
 			    i__ + j * x_dim1], abs(r__2));
 /* L60: */
 		}
 		work[k] += s;
 /* L70: */
 	    }
 	}
 	s = 0.f;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (work[i__] > safe2) {
 /* Computing MAX */
 		r__2 = s, r__3 = (r__1 = work[*n + i__], abs(r__1)) / work[
 			i__];
 		s = f2cmax(r__2,r__3);
 	    } else {
 /* Computing MAX */
 		r__2 = s, r__3 = ((r__1 = work[*n + i__], abs(r__1)) + safe1) 
 			/ (work[i__] + safe1);
 		s = f2cmax(r__2,r__3);
 	    }
 /* L80: */
 	}
 	berr[j] = s;

 /*        Test stopping criterion. Continue iterating if */
 /*           1) The residual BERR(J) is larger than machine epsilon, and */
 /*           2) BERR(J) decreased by at least a factor of 2 during the */
 /*              last iteration, and */
 /*           3) At most ITMAX iterations tried. */

 	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

 /*           Update solution and try again. */

 	    sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n 
 		    + 1], n, info);
 	    saxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
 		    ;
 	    lstres = berr[j];
 	    ++count;
 	    goto L20;
 	}

 /*        Bound error from formula */

 /*        norm(X - XTRUE) / norm(X) .le. FERR = */
 /*        norm( abs(inv(op(A)))* */
 /*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */

 /*        where */
 /*          norm(Z) is the magnitude of the largest component of Z */
 /*          inv(op(A)) is the inverse of op(A) */
 /*          abs(Z) is the componentwise absolute value of the matrix or */
 /*             vector Z */
 /*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
 /*          EPS is machine epsilon */

 /*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
 /*        is incremented by SAFE1 if the i-th component of */
 /*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */

 /*        Use SLACN2 to estimate the infinity-norm of the matrix */
 /*           inv(op(A)) * diag(W), */
 /*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */

 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (work[i__] > safe2) {
 		work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * 
 			work[i__];
 	    } else {
 		work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * 
 			work[i__] + safe1;
 	    }
 /* L90: */
 	}

 	kase = 0;
 L100:
 	slacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
 		kase, isave);
 	if (kase != 0) {
 	    if (kase == 1) {

 /*              Multiply by diag(W)*inv(op(A)**T). */

 		sgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
 			work[*n + 1], n, info);
 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    work[*n + i__] = work[i__] * work[*n + i__];
 /* L110: */
 		}
 	    } else {

 /*              Multiply by inv(op(A))*diag(W). */

 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    work[*n + i__] = work[i__] * work[*n + i__];
 /* L120: */
 		}
 		sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
 			work[*n + 1], n, info);
 	    }
 	    goto L100;
 	}

 /*        Normalize error. */

 	lstres = 0.f;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    r__2 = lstres, r__3 = (r__1 = x[i__ + j * x_dim1], abs(r__1));
 	    lstres = f2cmax(r__2,r__3);
 /* L130: */
 	}
 	if (lstres != 0.f) {
 	    ferr[j] /= lstres;
 	}

 /* L140: */
    }

    return 0;

 /*     End of SGERFS */

 } /* sgerfs_ */

--- a/lapack-netlib/SRC/sgerfsx.c
+++ b/lapack-netlib/SRC/sgerfsx.c
--- a/lapack-netlib/SRC/sgerq2.c
+++ b/lapack-netlib/SRC/sgerq2.c
@@ -0,0 +1,587 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorit
 hm. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGERQ2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgerq2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgerq2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgerq2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGERQ2 computes an RQ factorization of a real m by n matrix A: */
 /* > A = R * Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix A. */
 /* >          On exit, if m <= n, the upper triangle of the subarray */
 /* >          A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */
 /* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
 /* >          contain the m by n upper trapezoidal matrix R; the remaining */
 /* >          elements, with the array TAU, represent the orthogonal matrix */
 /* >          Q as a product of elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (M) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
 /* >  A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgerq2_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;

    /* Local variables */
    integer i__, k;
    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
 	    integer *, real *, real *, integer *, real *), xerbla_(
 	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
 	    integer *, real *);
    real aii;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGERQ2", &i__1, (ftnlen)6);
 	return 0;
    }

    k = f2cmin(*m,*n);

    for (i__ = k; i__ >= 1; --i__) {

 /*        Generate elementary reflector H(i) to annihilate */
 /*        A(m-k+i,1:n-k+i-1) */

 	i__1 = *n - k + i__;
 	slarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[*m - k 
 		+ i__ + a_dim1], lda, &tau[i__]);

 /*        Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */

 	aii = a[*m - k + i__ + (*n - k + i__) * a_dim1];
 	a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.f;
 	i__1 = *m - k + i__ - 1;
 	i__2 = *n - k + i__;
 	slarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[
 		i__], &a[a_offset], lda, &work[1]);
 	a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii;
 /* L10: */
    }
    return 0;

 /*     End of SGERQ2 */

 } /* sgerq2_ */

--- a/lapack-netlib/SRC/sgerqf.c
+++ b/lapack-netlib/SRC/sgerqf.c
@@ -0,0 +1,711 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;

 /* > \brief \b SGERQF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGERQF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgerqf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgerqf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgerqf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, M, N */
 /*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGERQF computes an RQ factorization of a real M-by-N matrix A: */
 /* > A = R * Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >          if m <= n, the upper triangle of the subarray */
 /* >          A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */
 /* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
 /* >          contain the M-by-N upper trapezoidal matrix R; */
 /* >          the remaining elements, with the array TAU, represent the */
 /* >          orthogonal matrix Q as a product of f2cmin(m,n) elementary */
 /* >          reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAU */
 /* > \verbatim */
 /* >          TAU is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,M). */
 /* >          For optimum performance LWORK >= M*NB, where NB is */
 /* >          the optimal blocksize. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - tau * v * v**T */
 /* > */
 /* >  where tau is a real scalar, and v is a real vector with */
 /* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
 /* >  A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgerqf_(integer *m, integer *n, real *a, integer *lda, 
 	real *tau, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int sgerq2_(integer *, integer *, real *, integer 
 	    *, real *, real *, integer *);
    integer ib, nb, ki, kk, mu, nu, nx;
    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
 	    real *, integer *, real *, real *, integer *);
    integer ldwork, lwkopt;
    logical lquery;
    integer iws;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    } else if (*lwork < f2cmax(1,*m) && ! lquery) {
 	*info = -7;
    }

    if (*info == 0) {
 	k = f2cmin(*m,*n);
 	if (k == 0) {
 	    lwkopt = 1;
 	} else {
 	    nb = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
 		    (ftnlen)1);
 	    lwkopt = *m * nb;
 	    work[1] = (real) lwkopt;
 	}
 	work[1] = (real) lwkopt;

 	if (*lwork < f2cmax(1,*m) && ! lquery) {
 	    *info = -7;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGERQF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (k == 0) {
 	return 0;
    }

    nbmin = 2;
    nx = 1;
    iws = *m;
    if (nb > 1 && nb < k) {

 /*        Determine when to cross over from blocked to unblocked code. */

 /* Computing MAX */
 	i__1 = 0, i__2 = ilaenv_(&c__3, "SGERQF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = f2cmax(i__1,i__2);
 	if (nx < k) {

 /*           Determine if workspace is large enough for blocked code. */

 	    ldwork = *m;
 	    iws = ldwork * nb;
 	    if (*lwork < iws) {

 /*              Not enough workspace to use optimal NB:  reduce NB and */
 /*              determine the minimum value of NB. */

 		nb = *lwork / ldwork;
 /* Computing MAX */
 		i__1 = 2, i__2 = ilaenv_(&c__2, "SGERQF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = f2cmax(i__1,i__2);
 	    }
 	}
    }

    if (nb >= nbmin && nb < k && nx < k) {

 /*        Use blocked code initially. */
 /*        The last kk rows are handled by the block method. */

 	ki = (k - nx - 1) / nb * nb;
 /* Computing MIN */
 	i__1 = k, i__2 = ki + nb;
 	kk = f2cmin(i__1,i__2);

 	i__1 = k - kk + 1;
 	i__2 = -nb;
 	for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ 
 		+= i__2) {
 /* Computing MIN */
 	    i__3 = k - i__ + 1;
 	    ib = f2cmin(i__3,nb);

 /*           Compute the RQ factorization of the current block */
 /*           A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */

 	    i__3 = *n - k + i__ + ib - 1;
 	    sgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], &
 		    work[1], &iinfo);
 	    if (*m - k + i__ > 1) {

 /*              Form the triangular factor of the block reflector */
 /*              H = H(i+ib-1) . . . H(i+1) H(i) */

 		i__3 = *n - k + i__ + ib - 1;
 		slarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + 
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);

 /*              Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */

 		i__3 = *m - k + i__ - 1;
 		i__4 = *n - k + i__ + ib - 1;
 		slarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3,
 			 &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1],
 			 &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork);
 	    }
 /* L10: */
 	}
 	mu = *m - k + i__ + nb - 1;
 	nu = *n - k + i__ + nb - 1;
    } else {
 	mu = *m;
 	nu = *n;
    }

 /*     Use unblocked code to factor the last or only block */

    if (mu > 0 && nu > 0) {
 	sgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    }

    work[1] = (real) iws;
    return 0;

 /*     End of SGERQF */

 } /* sgerqf_ */

--- a/lapack-netlib/SRC/sgesc2.c
+++ b/lapack-netlib/SRC/sgesc2.c
@@ -0,0 +1,604 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b SGESC2 solves a system of linear equations using the LU factorization with complete pivoting co
 mputed by sgetc2. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGESC2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesc2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesc2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesc2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) */

 /*       INTEGER            LDA, N */
 /*       REAL               SCALE */
 /*       INTEGER            IPIV( * ), JPIV( * ) */
 /*       REAL               A( LDA, * ), RHS( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGESC2 solves a system of linear equations */
 /* > */
 /* >           A * X = scale* RHS */
 /* > */
 /* > with a general N-by-N matrix A using the LU factorization with */
 /* > complete pivoting computed by SGETC2. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the  LU part of the factorization of the n-by-n */
 /* >          matrix A computed by SGETC2:  A = P * L * U * Q */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1, N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] RHS */
 /* > \verbatim */
 /* >          RHS is REAL array, dimension (N). */
 /* >          On entry, the right hand side vector b. */
 /* >          On exit, the solution vector X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N). */
 /* >          The pivot indices; for 1 <= i <= N, row i of the */
 /* >          matrix has been interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JPIV */
 /* > \verbatim */
 /* >          JPIV is INTEGER array, dimension (N). */
 /* >          The pivot indices; for 1 <= j <= N, column j of the */
 /* >          matrix has been interchanged with column JPIV(j). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] SCALE */
 /* > \verbatim */
 /* >          SCALE is REAL */
 /* >           On exit, SCALE contains the scale factor. SCALE is chosen */
 /* >           0 <= SCALE <= 1 to prevent overflow in the solution. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEauxiliary */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
 /* >     Umea University, S-901 87 Umea, Sweden. */

 /*  ===================================================================== */
 /* Subroutine */ int sgesc2_(integer *n, real *a, integer *lda, real *rhs, 
 	integer *ipiv, integer *jpiv, real *scale)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1, r__2;

    /* Local variables */
    real temp;
    integer i__, j;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
 	    slabad_(real *, real *);
    extern real slamch_(char *);
    real bignum;
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
 	    *, integer *, integer *, integer *);
    real smlnum, eps;


 /*  -- LAPACK auxiliary routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*      Set constant to control overflow */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --rhs;
    --ipiv;
    --jpiv;

    /* Function Body */
    eps = slamch_("P");
    smlnum = slamch_("S") / eps;
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

 /*     Apply permutations IPIV to RHS */

    i__1 = *n - 1;
    slaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1);

 /*     Solve for L part */

    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	i__2 = *n;
 	for (j = i__ + 1; j <= i__2; ++j) {
 	    rhs[j] -= a[j + i__ * a_dim1] * rhs[i__];
 /* L10: */
 	}
 /* L20: */
    }

 /*     Solve for U part */

    *scale = 1.f;

 /*     Check for scaling */

    i__ = isamax_(n, &rhs[1], &c__1);
    if (smlnum * 2.f * (r__1 = rhs[i__], abs(r__1)) > (r__2 = a[*n + *n * 
 	    a_dim1], abs(r__2))) {
 	temp = .5f / (r__1 = rhs[i__], abs(r__1));
 	sscal_(n, &temp, &rhs[1], &c__1);
 	*scale *= temp;
    }

    for (i__ = *n; i__ >= 1; --i__) {
 	temp = 1.f / a[i__ + i__ * a_dim1];
 	rhs[i__] *= temp;
 	i__1 = *n;
 	for (j = i__ + 1; j <= i__1; ++j) {
 	    rhs[i__] -= rhs[j] * (a[i__ + j * a_dim1] * temp);
 /* L30: */
 	}
 /* L40: */
    }

 /*     Apply permutations JPIV to the solution (RHS) */

    i__1 = *n - 1;
    slaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1);
    return 0;

 /*     End of SGESC2 */

 } /* sgesc2_ */

--- a/lapack-netlib/SRC/sgesdd.c
+++ b/lapack-netlib/SRC/sgesdd.c
--- a/lapack-netlib/SRC/sgesv.c
+++ b/lapack-netlib/SRC/sgesv.c
@@ -0,0 +1,576 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief <b> SGESV computes the solution to system of linear equations A * X = B for GE matrices</b> (simpl
 e driver) */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGESV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */

 /*       INTEGER            INFO, LDA, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               A( LDA, * ), B( LDB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGESV computes the solution to a real system of linear equations */
 /* >    A * X = B, */
 /* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */
 /* > */
 /* > The LU decomposition with partial pivoting and row interchanges is */
 /* > used to factor A as */
 /* >    A = P * L * U, */
 /* > where P is a permutation matrix, L is unit lower triangular, and U is */
 /* > upper triangular.  The factored form of A is then used to solve the */
 /* > system of equations A * X = B. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of linear equations, i.e., the order of the */
 /* >          matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the N-by-N coefficient matrix A. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices that define the permutation matrix P; */
 /* >          row i of the matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          On entry, the N-by-NRHS matrix of right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, so the solution could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int sgesv_(integer *n, integer *nrhs, real *a, integer *lda, 
 	integer *ipiv, real *b, integer *ldb, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgetrf_(
 	    integer *, integer *, real *, integer *, integer *, integer *), 
 	    sgetrs_(char *, integer *, integer *, real *, integer *, integer *
 	    , real *, integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
    } else if (*nrhs < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGESV ", &i__1, (ftnlen)5);
 	return 0;
    }

 /*     Compute the LU factorization of A. */

    sgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
    if (*info == 0) {

 /*        Solve the system A*X = B, overwriting B with X. */

 	sgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
 		b_offset], ldb, info);
    }
    return 0;

 /*     End of SGESV */

 } /* sgesv_ */

--- a/lapack-netlib/SRC/sgesvd.c
+++ b/lapack-netlib/SRC/sgesvd.c
--- a/lapack-netlib/SRC/sgesvdq.c
+++ b/lapack-netlib/SRC/sgesvdq.c
--- a/lapack-netlib/SRC/sgesvdx.c
+++ b/lapack-netlib/SRC/sgesvdx.c
--- a/lapack-netlib/SRC/sgesvj.c
+++ b/lapack-netlib/SRC/sgesvj.c
--- a/lapack-netlib/SRC/sgesvx.c
+++ b/lapack-netlib/SRC/sgesvx.c
--- a/lapack-netlib/SRC/sgesvxx.c
+++ b/lapack-netlib/SRC/sgesvxx.c
--- a/lapack-netlib/SRC/sgetc2.c
+++ b/lapack-netlib/SRC/sgetc2.c
@@ -0,0 +1,645 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static real c_b10 = -1.f;

 /* > \brief \b SGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGETC2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetc2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetc2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetc2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) */

 /*       INTEGER            INFO, LDA, N */
 /*       INTEGER            IPIV( * ), JPIV( * ) */
 /*       REAL               A( LDA, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGETC2 computes an LU factorization with complete pivoting of the */
 /* > n-by-n matrix A. The factorization has the form A = P * L * U * Q, */
 /* > where P and Q are permutation matrices, L is lower triangular with */
 /* > unit diagonal elements and U is upper triangular. */
 /* > */
 /* > This is the Level 2 BLAS algorithm. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA, N) */
 /* >          On entry, the n-by-n matrix A to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U*Q; the unit diagonal elements of L are not stored. */
 /* >          If U(k, k) appears to be less than SMIN, U(k, k) is given the */
 /* >          value of SMIN, i.e., giving a nonsingular perturbed system. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension(N). */
 /* >          The pivot indices; for 1 <= i <= N, row i of the */
 /* >          matrix has been interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] JPIV */
 /* > \verbatim */
 /* >          JPIV is INTEGER array, dimension(N). */
 /* >          The pivot indices; for 1 <= j <= N, column j of the */
 /* >          matrix has been interchanged with column JPIV(j). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >           = 0: successful exit */
 /* >           > 0: if INFO = k, U(k, k) is likely to produce overflow if */
 /* >                we try to solve for x in Ax = b. So U is perturbed to */
 /* >                avoid the overflow. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup realGEauxiliary */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
 /* >     Umea University, S-901 87 Umea, Sweden. */

 /*  ===================================================================== */
 /* Subroutine */ int sgetc2_(integer *n, real *a, integer *lda, integer *ipiv,
 	 integer *jpiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1;

    /* Local variables */
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
 	    integer *, real *, integer *, real *, integer *);
    real smin, xmax;
    integer i__, j;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
 	    integer *), slabad_(real *, real *);
    integer ip, jp;
    extern real slamch_(char *);
    real bignum, smlnum, eps;
    integer ipv, jpv;


 /*  -- LAPACK auxiliary routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --jpiv;

    /* Function Body */
    *info = 0;

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Set constants to control overflow */

    eps = slamch_("P");
    smlnum = slamch_("S") / eps;
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

 /*     Handle the case N=1 by itself */

    if (*n == 1) {
 	ipiv[1] = 1;
 	jpiv[1] = 1;
 	if ((r__1 = a[a_dim1 + 1], abs(r__1)) < smlnum) {
 	    *info = 1;
 	    a[a_dim1 + 1] = smlnum;
 	}
 	return 0;
    }

 /*     Factorize A using complete pivoting. */
 /*     Set pivots less than SMIN to SMIN. */

    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Find f2cmax element in matrix A */

 	xmax = 0.f;
 	i__2 = *n;
 	for (ip = i__; ip <= i__2; ++ip) {
 	    i__3 = *n;
 	    for (jp = i__; jp <= i__3; ++jp) {
 		if ((r__1 = a[ip + jp * a_dim1], abs(r__1)) >= xmax) {
 		    xmax = (r__1 = a[ip + jp * a_dim1], abs(r__1));
 		    ipv = ip;
 		    jpv = jp;
 		}
 /* L10: */
 	    }
 /* L20: */
 	}
 	if (i__ == 1) {
 /* Computing MAX */
 	    r__1 = eps * xmax;
 	    smin = f2cmax(r__1,smlnum);
 	}

 /*        Swap rows */

 	if (ipv != i__) {
 	    sswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda);
 	}
 	ipiv[i__] = ipv;

 /*        Swap columns */

 	if (jpv != i__) {
 	    sswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], &
 		    c__1);
 	}
 	jpiv[i__] = jpv;

 /*        Check for singularity */

 	if ((r__1 = a[i__ + i__ * a_dim1], abs(r__1)) < smin) {
 	    *info = i__;
 	    a[i__ + i__ * a_dim1] = smin;
 	}
 	i__2 = *n;
 	for (j = i__ + 1; j <= i__2; ++j) {
 	    a[j + i__ * a_dim1] /= a[i__ + i__ * a_dim1];
 /* L30: */
 	}
 	i__2 = *n - i__;
 	i__3 = *n - i__;
 	sger_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ 
 		+ (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * a_dim1], 
 		lda);
 /* L40: */
    }

    if ((r__1 = a[*n + *n * a_dim1], abs(r__1)) < smin) {
 	*info = *n;
 	a[*n + *n * a_dim1] = smin;
    }

 /*     Set last pivots to N */

    ipiv[*n] = *n;
    jpiv[*n] = *n;

    return 0;

 /*     End of SGETC2 */

 } /* sgetc2_ */

--- a/lapack-netlib/SRC/sgetf2.c
+++ b/lapack-netlib/SRC/sgetf2.c
@@ -0,0 +1,618 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static real c_b8 = -1.f;

 /* > \brief \b SGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row
 interchanges (unblocked algorithm). */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGETF2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetf2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetf2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetf2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGETF2( M, N, A, LDA, IPIV, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               A( LDA, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGETF2 computes an LU factorization of a general m-by-n matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = P * L * U */
 /* > where P is a permutation matrix, L is lower triangular with unit */
 /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
 /* > triangular (upper trapezoidal if m < n). */
 /* > */
 /* > This is the right-looking Level 2 BLAS version of the algorithm. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the m by n matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -k, the k-th argument had an illegal value */
 /* >          > 0: if INFO = k, U(k,k) is exactly zero. The factorization */
 /* >               has been completed, but the factor U is exactly */
 /* >               singular, and division by zero will occur if it is used */
 /* >               to solve a system of equations. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda, 
 	integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1;

    /* Local variables */
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
 	    integer *, real *, integer *, real *, integer *);
    integer i__, j;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    real sfmin;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
 	    integer *);
    integer jp;
    extern real slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer isamax_(integer *, real *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGETF2", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*m == 0 || *n == 0) {
 	return 0;
    }

 /*     Compute machine safe minimum */

    sfmin = slamch_("S");

    i__1 = f2cmin(*m,*n);
    for (j = 1; j <= i__1; ++j) {

 /*        Find pivot and test for singularity. */

 	i__2 = *m - j + 1;
 	jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1);
 	ipiv[j] = jp;
 	if (a[jp + j * a_dim1] != 0.f) {

 /*           Apply the interchange to columns 1:N. */

 	    if (jp != j) {
 		sswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
 	    }

 /*           Compute elements J+1:M of J-th column. */

 	    if (j < *m) {
 		if ((r__1 = a[j + j * a_dim1], abs(r__1)) >= sfmin) {
 		    i__2 = *m - j;
 		    r__1 = 1.f / a[j + j * a_dim1];
 		    sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
 		} else {
 		    i__2 = *m - j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
 /* L20: */
 		    }
 		}
 	    }

 	} else if (*info == 0) {

 	    *info = j;
 	}

 	if (j < f2cmin(*m,*n)) {

 /*           Update trailing submatrix. */

 	    i__2 = *m - j;
 	    i__3 = *n - j;
 	    sger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + (
 		    j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda);
 	}
 /* L10: */
    }
    return 0;

 /*     End of SGETF2 */

 } /* sgetf2_ */

--- a/lapack-netlib/SRC/sgetrf.c
+++ b/lapack-netlib/SRC/sgetrf.c
@@ -0,0 +1,643 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static real c_b16 = 1.f;
 static real c_b19 = -1.f;

 /* > \brief \b SGETRF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGETRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               A( LDA, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGETRF computes an LU factorization of a general M-by-N matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = P * L * U */
 /* > where P is a permutation matrix, L is lower triangular with unit */
 /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
 /* > triangular (upper trapezoidal if m < n). */
 /* > */
 /* > This is the right-looking Level 3 BLAS version of the algorithm. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and division by zero will occur if it is used */
 /* >                to solve a system of equations. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda, 
 	integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;

    /* Local variables */
    integer i__, j, iinfo;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
 	    integer *, real *, real *, integer *, real *, integer *, real *, 
 	    real *, integer *), strsm_(char *, char *, char *,
 	     char *, integer *, integer *, real *, real *, integer *, real *, 
 	    integer *);
    integer jb, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
 	    *, integer *, integer *, integer *), sgetrf2_(integer *, integer *
 	    , real *, integer *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGETRF", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*m == 0 || *n == 0) {
 	return 0;
    }

 /*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
    if (nb <= 1 || nb >= f2cmin(*m,*n)) {

 /*        Use unblocked code. */

 	sgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
    } else {

 /*        Use blocked code. */

 	i__1 = f2cmin(*m,*n);
 	i__2 = nb;
 	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 /* Computing MIN */
 	    i__3 = f2cmin(*m,*n) - j + 1;
 	    jb = f2cmin(i__3,nb);

 /*           Factor diagonal and subdiagonal blocks and test for exact */
 /*           singularity. */

 	    i__3 = *m - j + 1;
 	    sgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);

 /*           Adjust INFO and the pivot indices. */

 	    if (*info == 0 && iinfo > 0) {
 		*info = iinfo + j - 1;
 	    }
 /* Computing MIN */
 	    i__4 = *m, i__5 = j + jb - 1;
 	    i__3 = f2cmin(i__4,i__5);
 	    for (i__ = j; i__ <= i__3; ++i__) {
 		ipiv[i__] = j - 1 + ipiv[i__];
 /* L10: */
 	    }

 /*           Apply interchanges to columns 1:J-1. */

 	    i__3 = j - 1;
 	    i__4 = j + jb - 1;
 	    slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);

 	    if (j + jb <= *n) {

 /*              Apply interchanges to columns J+JB:N. */

 		i__3 = *n - j - jb + 1;
 		i__4 = j + jb - 1;
 		slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
 			ipiv[1], &c__1);

 /*              Compute block row of U. */

 		i__3 = *n - j - jb + 1;
 		strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
 			c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * 
 			a_dim1], lda);
 		if (j + jb <= *m) {

 /*                 Update trailing submatrix. */

 		    i__3 = *m - j - jb + 1;
 		    i__4 = *n - j - jb + 1;
 		    sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, 
 			    &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + 
 			    jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) *
 			     a_dim1], lda);
 		}
 	    }
 /* L20: */
 	}
    }
    return 0;

 /*     End of SGETRF */

 } /* sgetrf_ */

--- a/lapack-netlib/SRC/sgetrf2.c
+++ b/lapack-netlib/SRC/sgetrf2.c
@@ -0,0 +1,681 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static real c_b13 = 1.f;
 static real c_b16 = -1.f;

 /* > \brief \b SGETRF2 */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /*  Definition: */
 /*  =========== */

 /*        SUBROUTINE SGETRF2( M, N, A, LDA, IPIV, INFO ) */

 /*       INTEGER            INFO, LDA, M, N */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               A( LDA, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGETRF2 computes an LU factorization of a general M-by-N matrix A */
 /* > using partial pivoting with row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = P * L * U */
 /* > where P is a permutation matrix, L is lower triangular with unit */
 /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
 /* > triangular (upper trapezoidal if m < n). */
 /* > */
 /* > This is the recursive version of the algorithm. It divides */
 /* > the matrix into four submatrices: */
 /* > */
 /* >        [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2 */
 /* >    A = [ -----|----- ]  with n1 = f2cmin(m,n)/2 */
 /* >        [  A21 | A22  ]       n2 = n-n1 */
 /* > */
 /* >                                       [ A11 ] */
 /* > The subroutine calls itself to factor [ --- ], */
 /* >                                       [ A12 ] */
 /* >                 [ A12 ] */
 /* > do the swaps on [ --- ], solve A12, update A22, */
 /* >                 [ A22 ] */
 /* > */
 /* > then calls itself to factor A22 and do the swaps on A21. */
 /* > */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix to be factored. */
 /* >          On exit, the factors L and U from the factorization */
 /* >          A = P*L*U; the unit diagonal elements of L are not stored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
 /* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and division by zero will occur if it is used */
 /* >                to solve a system of equations. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgetrf2_(integer *m, integer *n, real *a, integer *lda, 
 	integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    real temp;
    integer i__, iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
 	    sgemm_(char *, char *, integer *, integer *, integer *, real *, 
 	    real *, integer *, real *, integer *, real *, real *, integer *);
    real sfmin;
    integer n1, n2;
    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
 	    integer *, integer *, real *, real *, integer *, real *, integer *
 	    );
    extern real slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
 	    *, integer *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -4;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGETRF2", &i__1, (ftnlen)7);
 	return 0;
    }

 /*     Quick return if possible */

    if (*m == 0 || *n == 0) {
 	return 0;
    }
    if (*m == 1) {

 /*        Use unblocked code for one row case */
 /*        Just need to handle IPIV and INFO */

 	ipiv[1] = 1;
 	if (a[a_dim1 + 1] == 0.f) {
 	    *info = 1;
 	}

    } else if (*n == 1) {

 /*        Use unblocked code for one column case */


 /*        Compute machine safe minimum */

 	sfmin = slamch_("S");

 /*        Find pivot and test for singularity */

 	i__ = isamax_(m, &a[a_dim1 + 1], &c__1);
 	ipiv[1] = i__;
 	if (a[i__ + a_dim1] != 0.f) {

 /*           Apply the interchange */

 	    if (i__ != 1) {
 		temp = a[a_dim1 + 1];
 		a[a_dim1 + 1] = a[i__ + a_dim1];
 		a[i__ + a_dim1] = temp;
 	    }

 /*           Compute elements 2:M of the column */

 	    if ((r__1 = a[a_dim1 + 1], abs(r__1)) >= sfmin) {
 		i__1 = *m - 1;
 		r__1 = 1.f / a[a_dim1 + 1];
 		sscal_(&i__1, &r__1, &a[a_dim1 + 2], &c__1);
 	    } else {
 		i__1 = *m - 1;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    a[i__ + 1 + a_dim1] /= a[a_dim1 + 1];
 /* L10: */
 		}
 	    }

 	} else {
 	    *info = 1;
 	}

    } else {

 /*        Use recursive code */

 	n1 = f2cmin(*m,*n) / 2;
 	n2 = *n - n1;

 /*               [ A11 ] */
 /*        Factor [ --- ] */
 /*               [ A21 ] */

 	sgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo);
 	if (*info == 0 && iinfo > 0) {
 	    *info = iinfo;
 	}

 /*                              [ A12 ] */
 /*        Apply interchanges to [ --- ] */
 /*                              [ A22 ] */

 	slaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &
 		c__1);

 /*        Solve A12 */

 	strsm_("L", "L", "N", "U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[(
 		n1 + 1) * a_dim1 + 1], lda);

 /*        Update A22 */

 	i__1 = *m - n1;
 	sgemm_("N", "N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda, &
 		a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * 
 		a_dim1], lda);

 /*        Factor A22 */

 	i__1 = *m - n1;
 	sgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 
 		1], &iinfo);

 /*        Adjust INFO and the pivot indices */

 	if (*info == 0 && iinfo > 0) {
 	    *info = iinfo + n1;
 	}
 	i__1 = f2cmin(*m,*n);
 	for (i__ = n1 + 1; i__ <= i__1; ++i__) {
 	    ipiv[i__] += n1;
 /* L20: */
 	}

 /*        Apply interchanges to A21 */

 	i__1 = n1 + 1;
 	i__2 = f2cmin(*m,*n);
 	slaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1);

    }
    return 0;

 /*     End of SGETRF2 */

 } /* sgetrf2_ */

--- a/lapack-netlib/SRC/sgetri.c
+++ b/lapack-netlib/SRC/sgetri.c
@@ -0,0 +1,690 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static integer c__2 = 2;
 static real c_b20 = -1.f;
 static real c_b22 = 1.f;

 /* > \brief \b SGETRI */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGETRI + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetri.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetri.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetri.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LWORK, N */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               A( LDA, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGETRI computes the inverse of a matrix using the LU factorization */
 /* > computed by SGETRF. */
 /* > */
 /* > This method inverts U and then computes inv(A) by solving the system */
 /* > inv(A)*L = inv(U) for inv(A). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the factors L and U from the factorization */
 /* >          A = P*L*U as computed by SGETRF. */
 /* >          On exit, if INFO = 0, the inverse of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from SGETRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
 /* >          For optimal performance LWORK >= N*NB, where NB is */
 /* >          the optimal blocksize returned by ILAENV. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is */
 /* >                singular and its inverse could not be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgetri_(integer *n, real *a, integer *lda, integer *ipiv,
 	 real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;

    /* Local variables */
    integer i__, j, nbmin;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
 	    integer *, real *, real *, integer *, real *, integer *, real *, 
 	    real *, integer *), sgemv_(char *, integer *, 
 	    integer *, real *, real *, integer *, real *, integer *, real *, 
 	    real *, integer *), sswap_(integer *, real *, integer *, 
 	    real *, integer *), strsm_(char *, char *, char *, char *, 
 	    integer *, integer *, real *, real *, integer *, real *, integer *
 	    );
    integer jb, nb, jj, jp, nn;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer ldwork, lwkopt;
    logical lquery;
    extern /* Subroutine */ int strtri_(char *, char *, integer *, real *, 
 	    integer *, integer *);
    integer iws;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    --work;

    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "SGETRI", " ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    lwkopt = *n * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -3;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGETRI", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Form inv(U).  If INFO > 0 from STRTRI, then U is singular, */
 /*     and the inverse is not computed. */

    strtri_("Upper", "Non-unit", n, &a[a_offset], lda, info);
    if (*info > 0) {
 	return 0;
    }

    nbmin = 2;
    ldwork = *n;
    if (nb > 1 && nb < *n) {
 /* Computing MAX */
 	i__1 = ldwork * nb;
 	iws = f2cmax(i__1,1);
 	if (*lwork < iws) {
 	    nb = *lwork / ldwork;
 /* Computing MAX */
 	    i__1 = 2, i__2 = ilaenv_(&c__2, "SGETRI", " ", n, &c_n1, &c_n1, &
 		    c_n1, (ftnlen)6, (ftnlen)1);
 	    nbmin = f2cmax(i__1,i__2);
 	}
    } else {
 	iws = *n;
    }

 /*     Solve the equation inv(A)*L = inv(U) for inv(A). */

    if (nb < nbmin || nb >= *n) {

 /*        Use unblocked code. */

 	for (j = *n; j >= 1; --j) {

 /*           Copy current column of L to WORK and replace with zeros. */

 	    i__1 = *n;
 	    for (i__ = j + 1; i__ <= i__1; ++i__) {
 		work[i__] = a[i__ + j * a_dim1];
 		a[i__ + j * a_dim1] = 0.f;
 /* L10: */
 	    }

 /*           Compute current column of inv(A). */

 	    if (j < *n) {
 		i__1 = *n - j;
 		sgemv_("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 
 			+ 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 
 			+ 1], &c__1);
 	    }
 /* L20: */
 	}
    } else {

 /*        Use blocked code. */

 	nn = (*n - 1) / nb * nb + 1;
 	i__1 = -nb;
 	for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
 /* Computing MIN */
 	    i__2 = nb, i__3 = *n - j + 1;
 	    jb = f2cmin(i__2,i__3);

 /*           Copy current block column of L to WORK and replace with */
 /*           zeros. */

 	    i__2 = j + jb - 1;
 	    for (jj = j; jj <= i__2; ++jj) {
 		i__3 = *n;
 		for (i__ = jj + 1; i__ <= i__3; ++i__) {
 		    work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1];
 		    a[i__ + jj * a_dim1] = 0.f;
 /* L30: */
 		}
 /* L40: */
 	    }

 /*           Compute current block column of inv(A). */

 	    if (j + jb <= *n) {
 		i__2 = *n - j - jb + 1;
 		sgemm_("No transpose", "No transpose", n, &jb, &i__2, &c_b20, 
 			&a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &
 			ldwork, &c_b22, &a[j * a_dim1 + 1], lda);
 	    }
 	    strsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, &
 		    work[j], &ldwork, &a[j * a_dim1 + 1], lda);
 /* L50: */
 	}
    }

 /*     Apply column interchanges. */

    for (j = *n - 1; j >= 1; --j) {
 	jp = ipiv[j];
 	if (jp != j) {
 	    sswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
 	}
 /* L60: */
    }

    work[1] = (real) iws;
    return 0;

 /*     End of SGETRI */

 } /* sgetri_ */

--- a/lapack-netlib/SRC/sgetrs.c
+++ b/lapack-netlib/SRC/sgetrs.c
@@ -0,0 +1,619 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static real c_b12 = 1.f;
 static integer c_n1 = -1;

 /* > \brief \b SGETRS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGETRS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetrs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetrs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetrs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               A( LDA, * ), B( LDB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGETRS solves a system of linear equations */
 /* >    A * X = B  or  A**T * X = B */
 /* > with a general N-by-N matrix A using the LU factorization computed */
 /* > by SGETRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B  (No transpose) */
 /* >          = 'T':  A**T* X = B  (Transpose) */
 /* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          The factors L and U from the factorization A = P*L*U */
 /* >          as computed by SGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices from SGETRF; for 1<=i<=N, row i of the */
 /* >          matrix was interchanged with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          On entry, the right hand side matrix B. */
 /* >          On exit, the solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a, 
 	integer *lda, integer *ipiv, real *b, integer *ldb, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
 	    integer *, integer *, real *, real *, integer *, real *, integer *
 	    ), xerbla_(char *, integer *, ftnlen);
    logical notran;
    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
 	    *, integer *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGETRS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	return 0;
    }

    if (notran) {

 /*        Solve A * X = B. */

 /*        Apply row interchanges to the right hand sides. */

 	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);

 /*        Solve L*X = B, overwriting B with X. */

 	strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[
 		a_offset], lda, &b[b_offset], ldb);

 /*        Solve U*X = B, overwriting B with X. */

 	strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, &
 		a[a_offset], lda, &b[b_offset], ldb);
    } else {

 /*        Solve A**T * X = B. */

 /*        Solve U**T *X = B, overwriting B with X. */

 	strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[
 		a_offset], lda, &b[b_offset], ldb);

 /*        Solve L**T *X = B, overwriting B with X. */

 	strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[
 		a_offset], lda, &b[b_offset], ldb);

 /*        Apply row interchanges to the solution vectors. */

 	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
    }

    return 0;

 /*     End of SGETRS */

 } /* sgetrs_ */

--- a/lapack-netlib/SRC/sgetsls.c
+++ b/lapack-netlib/SRC/sgetsls.c
@@ -0,0 +1,929 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;
 static integer c_n2 = -2;
 static real c_b23 = 0.f;
 static integer c__0 = 0;

 /* > \brief \b SGETSLS */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, */
 /*     $                     WORK, LWORK, INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS */
 /*       REAL               A( LDA, * ), B( LDB, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGETSLS solves overdetermined or underdetermined real linear systems */
 /* > involving an M-by-N matrix A, using a tall skinny QR or short wide LQ */
 /* > factorization of A.  It is assumed that A has full rank. */
 /* > */
 /* > */
 /* > */
 /* > The following options are provided: */
 /* > */
 /* > 1. If TRANS = 'N' and m >= n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A*X ||. */
 /* > */
 /* > 2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
 /* >    an underdetermined system A * X = B. */
 /* > */
 /* > 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of */
 /* >    an undetermined system A**T * X = B. */
 /* > */
 /* > 4. If TRANS = 'T' and m < n:  find the least squares solution of */
 /* >    an overdetermined system, i.e., solve the least squares problem */
 /* >                 minimize || B - A**T * X ||. */
 /* > */
 /* > Several right hand side vectors b and solution vectors x can be */
 /* > handled in a single call; they are stored as the columns of the */
 /* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
 /* > matrix X. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          = 'N': the linear system involves A; */
 /* >          = 'T': the linear system involves A**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of */
 /* >          columns of the matrices B and X. NRHS >=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, */
 /* >          A is overwritten by details of its QR or LQ */
 /* >          factorization as returned by SGEQR or SGELQ. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          On entry, the matrix B of right hand side vectors, stored */
 /* >          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
 /* >          if TRANS = 'T'. */
 /* >          On exit, if INFO = 0, B is overwritten by the solution */
 /* >          vectors, stored columnwise: */
 /* >          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
 /* >          squares solution vectors. */
 /* >          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'T' and m >= n, rows 1 to M of B contain the */
 /* >          minimum norm solution vectors; */
 /* >          if TRANS = 'T' and m < n, rows 1 to M of B contain the */
 /* >          least squares solution vectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= MAX(1,M,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          (workspace) REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
 /* >          or optimal, if query was assumed) LWORK. */
 /* >          See LWORK for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* >          If LWORK = -1 or -2, then a workspace query is assumed. */
 /* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
 /* >          optimal performance and returns this value in WORK(1). */
 /* >          If LWORK = -2, the routine calculates minimal size of WORK and */
 /* >          returns this value in WORK(1). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO =  i, the i-th diagonal element of the */
 /* >                triangular factor of A is zero, so that A does not have */
 /* >                full rank; the least squares solution could not be */
 /* >                computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup realGEsolve */

 /*  ===================================================================== */
 /* Subroutine */ int sgetsls_(char *trans, integer *m, integer *n, integer *
 	nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, 
 	integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    real anrm, bnrm;
    logical tran;
    integer brow, tszm, tszo, info2, i__, j, iascl, ibscl;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgelq_(integer *, integer *, real *, integer *
 	    , real *, integer *, real *, integer *, integer *);
    integer minmn, maxmn;
    extern /* Subroutine */ int sgeqr_(integer *, integer *, real *, integer *
 	    , real *, integer *, real *, integer *, integer *);
    real workq[1];
    extern /* Subroutine */ int slabad_(real *, real *);
    real tq[5];
    extern real slamch_(char *), slange_(char *, integer *, integer *,
 	     real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    integer scllen;
    real bignum;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
 	    real *, integer *, integer *, real *, integer *, integer *), sgemlq_(char *, char *, integer *, integer *, integer *, 
 	    real *, integer *, real *, integer *, real *, integer *, real *, 
 	    integer *, integer *), slaset_(char *, integer *, 
 	    integer *, real *, real *, real *, integer *), sgemqr_(
 	    char *, char *, integer *, integer *, integer *, real *, integer *
 	    , real *, integer *, real *, integer *, real *, integer *, 
 	    integer *);
    real smlnum;
    integer wsizem, wsizeo;
    logical lquery;
    integer lw1, lw2;
    extern /* Subroutine */ int strtrs_(char *, char *, char *, integer *, 
 	    integer *, real *, integer *, real *, integer *, integer *);
    integer mnk, lwm, lwo;


 /*  -- LAPACK driver routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */



 /*  ===================================================================== */


 /*     Test the input arguments. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    minmn = f2cmin(*m,*n);
    maxmn = f2cmax(*m,*n);
    mnk = f2cmax(minmn,*nrhs);
    tran = lsame_(trans, "T");

    lquery = *lwork == -1 || *lwork == -2;
    if (! (lsame_(trans, "N") || lsame_(trans, "T"))) {
 	*info = -1;
    } else if (*m < 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -6;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m);
 	if (*ldb < f2cmax(i__1,*n)) {
 	    *info = -8;
 	}
    }

    if (*info == 0) {

 /*     Determine the block size and minimum LWORK */

 	if (*m >= *n) {
 	    sgeqr_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
 	    tszo = (integer) tq[0];
 	    lwo = (integer) workq[0];
 	    sgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszo, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwo, i__2 = (integer) workq[0];
 	    lwo = f2cmax(i__1,i__2);
 	    sgeqr_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
 	    tszm = (integer) tq[0];
 	    lwm = (integer) workq[0];
 	    sgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszm, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwm, i__2 = (integer) workq[0];
 	    lwm = f2cmax(i__1,i__2);
 	    wsizeo = tszo + lwo;
 	    wsizem = tszm + lwm;
 	} else {
 	    sgelq_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
 	    tszo = (integer) tq[0];
 	    lwo = (integer) workq[0];
 	    sgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszo, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwo, i__2 = (integer) workq[0];
 	    lwo = f2cmax(i__1,i__2);
 	    sgelq_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
 	    tszm = (integer) tq[0];
 	    lwm = (integer) workq[0];
 	    sgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszm, &b[
 		    b_offset], ldb, workq, &c_n1, &info2);
 /* Computing MAX */
 	    i__1 = lwm, i__2 = (integer) workq[0];
 	    lwm = f2cmax(i__1,i__2);
 	    wsizeo = tszo + lwo;
 	    wsizem = tszm + lwm;
 	}

 	if (*lwork < wsizem && ! lquery) {
 	    *info = -10;
 	}

    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGETSLS", &i__1, (ftnlen)7);
 	work[1] = (real) wsizeo;
 	return 0;
    }
    if (lquery) {
 	if (*lwork == -1) {
 	    work[1] = (real) wsizeo;
 	}
 	if (*lwork == -2) {
 	    work[1] = (real) wsizem;
 	}
 	return 0;
    }
    if (*lwork < wsizeo) {
 	lw1 = tszm;
 	lw2 = lwm;
    } else {
 	lw1 = tszo;
 	lw2 = lwo;
    }

 /*     Quick return if possible */

 /* Computing MIN */
    i__1 = f2cmin(*m,*n);
    if (f2cmin(i__1,*nrhs) == 0) {
 	i__1 = f2cmax(*m,*n);
 	slaset_("FULL", &i__1, nrhs, &c_b23, &c_b23, &b[b_offset], ldb);
 	return 0;
    }

 /*     Get machine parameters */

    smlnum = slamch_("S") / slamch_("P");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

 /*     Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */

    anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
    iascl = 0;
    if (anrm > 0.f && anrm < smlnum) {

 /*        Scale matrix norm up to SMLNUM */

 	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 1;
    } else if (anrm > bignum) {

 /*        Scale matrix norm down to BIGNUM */

 	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
 		info);
 	iascl = 2;
    } else if (anrm == 0.f) {

 /*        Matrix all zero. Return zero solution. */

 	slaset_("F", &maxmn, nrhs, &c_b23, &c_b23, &b[b_offset], ldb);
 	goto L50;
    }

    brow = *m;
    if (tran) {
 	brow = *n;
    }
    bnrm = slange_("M", &brow, nrhs, &b[b_offset], ldb, &work[1]);
    ibscl = 0;
    if (bnrm > 0.f && bnrm < smlnum) {

 /*        Scale matrix norm up to SMLNUM */

 	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 1;
    } else if (bnrm > bignum) {

 /*        Scale matrix norm down to BIGNUM */

 	slascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
 		ldb, info);
 	ibscl = 2;
    }

    if (*m >= *n) {

 /*        compute QR factorization of A */

 	sgeqr_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
 		info);
 	if (! tran) {

 /*           Least-Squares Problem f2cmin || A * X - B || */

 /*           B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */

 	    sgemqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);

 /*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */

 	    strtrs_("U", "N", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);
 	    if (*info > 0) {
 		return 0;
 	    }
 	    scllen = *n;
 	} else {

 /*           Overdetermined system of equations A**T * X = B */

 /*           B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */

 	    strtrs_("U", "T", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 /*           B(N+1:M,1:NRHS) = ZERO */

 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = *n + 1; i__ <= i__2; ++i__) {
 		    b[i__ + j * b_dim1] = 0.f;
 /* L10: */
 		}
 /* L20: */
 	    }

 /*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */

 	    sgemqr_("L", "N", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);

 	    scllen = *m;

 	}

    } else {

 /*        Compute LQ factorization of A */

 	sgelq_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
 		info);

 /*        workspace at least M, optimally M*NB. */

 	if (! tran) {

 /*           underdetermined system of equations A * X = B */

 /*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */

 	    strtrs_("L", "N", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], 
 		    ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 /*           B(M+1:N,1:NRHS) = 0 */

 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *n;
 		for (i__ = *m + 1; i__ <= i__2; ++i__) {
 		    b[i__ + j * b_dim1] = 0.f;
 /* L30: */
 		}
 /* L40: */
 	    }

 /*           B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */

 	    sgemlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);

 /*           workspace at least NRHS, optimally NRHS*NB */

 	    scllen = *n;

 	} else {

 /*           overdetermined system f2cmin || A**T * X - B || */

 /*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */

 	    sgemlq_("L", "N", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
 		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);

 /*           workspace at least NRHS, optimally NRHS*NB */

 /*           B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */

 	    strtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], 
 		    lda, &b[b_offset], ldb, info);

 	    if (*info > 0) {
 		return 0;
 	    }

 	    scllen = *m;

 	}

    }

 /*     Undo scaling */

    if (iascl == 1) {
 	slascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (iascl == 2) {
 	slascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }
    if (ibscl == 1) {
 	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    } else if (ibscl == 2) {
 	slascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
 		, ldb, info);
    }

 L50:
    work[1] = (real) (tszo + lwo);
    return 0;

 /*     End of SGETSLS */

 } /* sgetsls_ */

--- a/lapack-netlib/SRC/sgetsqrhrt.c
+++ b/lapack-netlib/SRC/sgetsqrhrt.c
@@ -0,0 +1,765 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SGETSQRHRT */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGETSQRHRT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetsqr
 hrt.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetsqr
 hrt.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetsqr
 hrt.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, */
 /*      $                       LWORK, INFO ) */
 /*       IMPLICIT NONE */

 /*       INTEGER           INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 */
 /*       REAL              A( LDA, * ), T( LDT, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGETSQRHRT computes a NB2-sized column blocked QR-factorization */
 /* > of a complex M-by-N matrix A with M >= N, */
 /* > */
 /* >    A = Q * R. */
 /* > */
 /* > The routine uses internally a NB1-sized column blocked and MB1-sized */
 /* > row blocked TSQR-factorization and perfors the reconstruction */
 /* > of the Householder vectors from the TSQR output. The routine also */
 /* > converts the R_tsqr factor from the TSQR-factorization output into */
 /* > the R factor that corresponds to the Householder QR-factorization, */
 /* > */
 /* >    A = Q_tsqr * R_tsqr = Q * R. */
 /* > */
 /* > The output Q and R factors are stored in the same format as in SGEQRT */
 /* > (Q is in blocked compact WY-representation). See the documentation */
 /* > of SGEQRT for more details on the format. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrix A. M >= N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] MB1 */
 /* > \verbatim */
 /* >          MB1 is INTEGER */
 /* >          The row block size to be used in the blocked TSQR. */
 /* >          MB1 > N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB1 */
 /* > \verbatim */
 /* >          NB1 is INTEGER */
 /* >          The column block size to be used in the blocked TSQR. */
 /* >          N >= NB1 >= 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NB2 */
 /* > \verbatim */
 /* >          NB2 is INTEGER */
 /* >          The block size to be used in the blocked QR that is */
 /* >          output. NB2 >= 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* > */
 /* >          On entry: an M-by-N matrix A. */
 /* > */
 /* >          On exit: */
 /* >           a) the elements on and above the diagonal */
 /* >              of the array contain the N-by-N upper-triangular */
 /* >              matrix R corresponding to the Householder QR; */
 /* >           b) the elements below the diagonal represent Q by */
 /* >              the columns of blocked V (compact WY-representation). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] T */
 /* > \verbatim */
 /* >          T is REAL array, dimension (LDT,N)) */
 /* >          The upper triangular block reflectors stored in compact form */
 /* >          as a sequence of upper triangular blocks. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDT */
 /* > \verbatim */
 /* >          LDT is INTEGER */
 /* >          The leading dimension of the array T.  LDT >= NB2. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          (workspace) REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          The dimension of the array WORK. */
 /* >          LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ), */
 /* >          where */
 /* >             NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)), */
 /* >             NB1LOCAL = MIN(NB1,N). */
 /* >             LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL, */
 /* >             LW1 = NB1LOCAL * N, */
 /* >             LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ), */
 /* >          If LWORK = -1, then a workspace query is assumed. */
 /* >          The routine only calculates the optimal size of the WORK */
 /* >          array, returns this value as the first entry of the WORK */
 /* >          array, and no error message related to LWORK is issued */
 /* >          by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \ingroup singleOTHERcomputational */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* > November 2020, Igor Kozachenko, */
 /* >                Computer Science Division, */
 /* >                University of California, Berkeley */
 /* > */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgetsqrhrt_(integer *m, integer *n, integer *mb1, 
 	integer *nb1, integer *nb2, real *a, integer *lda, real *t, integer *
 	ldt, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4;
    real r__1, r__2, r__3;

    /* Local variables */
    integer ldwt, lworkopt, i__, j, iinfo;
    extern /* Subroutine */ int sorgtsqr_row_(integer *, integer *, integer *
 	    , integer *, real *, integer *, real *, integer *, real *, 
 	    integer *, integer *), scopy_(integer *, real *, integer *, real *
 	    , integer *), sorhr_col_(integer *, integer *, integer *, real *,
 	     integer *, real *, integer *, real *, integer *), xerbla_(char *,
 	     integer *, ftnlen);
    logical lquery;
    integer lw1, lw2, num_all_row_blocks__, lwt;
    extern /* Subroutine */ int slatsqr_(integer *, integer *, integer *, 
 	    integer *, real *, integer *, real *, integer *, real *, integer *
 	    , integer *);
    integer nb1local, nb2local;


 /*  -- LAPACK computational routine -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */


 /*  ===================================================================== */


 /*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    --work;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0 || *m < *n) {
 	*info = -2;
    } else if (*mb1 <= *n) {
 	*info = -3;
    } else if (*nb1 < 1) {
 	*info = -4;
    } else if (*nb2 < 1) {
 	*info = -5;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -7;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = f2cmin(*nb2,*n);
 	if (*ldt < f2cmax(i__1,i__2)) {
 	    *info = -9;
 	} else {

 /*        Test the input LWORK for the dimension of the array WORK. */
 /*        This workspace is used to store array: */
 /*        a) Matrix T and WORK for SLATSQR; */
 /*        b) N-by-N upper-triangular factor R_tsqr; */
 /*        c) Matrix T and array WORK for SORGTSQR_ROW; */
 /*        d) Diagonal D for SORHR_COL. */

 	    if (*lwork < *n * *n + 1 && ! lquery) {
 		*info = -11;
 	    } else {

 /*           Set block size for column blocks */

 		nb1local = f2cmin(*nb1,*n);

 /* Computing MAX */
 		r__3 = (real) (*m - *n) / (real) (*mb1 - *n) + .5f;
 		r__1 = 1.f, r__2 = r_int(&r__3);
 		num_all_row_blocks__ = f2cmax(r__1,r__2);

 /*           Length and leading dimension of WORK array to place */
 /*           T array in TSQR. */

 		lwt = num_all_row_blocks__ * *n * nb1local;
 		ldwt = nb1local;

 /*           Length of TSQR work array */

 		lw1 = nb1local * *n;

 /*           Length of SORGTSQR_ROW work array. */

 /* Computing MAX */
 		i__1 = nb1local, i__2 = *n - nb1local;
 		lw2 = nb1local * f2cmax(i__1,i__2);

 /* Computing MAX */
 /* Computing MAX */
 		i__3 = lwt + *n * *n + lw2, i__4 = lwt + *n * *n + *n;
 		i__1 = lwt + lw1, i__2 = f2cmax(i__3,i__4);
 		lworkopt = f2cmax(i__1,i__2);

 		if (*lwork < f2cmax(1,lworkopt) && ! lquery) {
 		    *info = -11;
 		}

 	    }
 	}
    }

 /*     Handle error in the input parameters and return workspace query. */

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGETSQRHRT", &i__1, (ftnlen)10);
 	return 0;
    } else if (lquery) {
 	work[1] = (real) lworkopt;
 	return 0;
    }

 /*     Quick return if possible */

    if (f2cmin(*m,*n) == 0) {
 	work[1] = (real) lworkopt;
 	return 0;
    }

    nb2local = f2cmin(*nb2,*n);


 /*     (1) Perform TSQR-factorization of the M-by-N matrix A. */

    slatsqr_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, &work[
 	    lwt + 1], &lw1, &iinfo);

 /*     (2) Copy the factor R_tsqr stored in the upper-triangular part */
 /*         of A into the square matrix in the work array */
 /*         WORK(LWT+1:LWT+N*N) column-by-column. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
 	scopy_(&j, &a[j * a_dim1 + 1], &c__1, &work[lwt + *n * (j - 1) + 1], &
 		c__1);
    }

 /*     (3) Generate a M-by-N matrix Q with orthonormal columns from */
 /*     the result stored below the diagonal in the array A in place. */

    sorgtsqr_row_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, &
 	    work[lwt + *n * *n + 1], &lw2, &iinfo);

 /*     (4) Perform the reconstruction of Householder vectors from */
 /*     the matrix Q (stored in A) in place. */

    sorhr_col_(m, n, &nb2local, &a[a_offset], lda, &t[t_offset], ldt, &work[
 	    lwt + *n * *n + 1], &iinfo);

 /*     (5) Copy the factor R_tsqr stored in the square matrix in the */
 /*     work array WORK(LWT+1:LWT+N*N) into the upper-triangular */
 /*     part of A. */

 /*     (6) Compute from R_tsqr the factor R_hr corresponding to */
 /*     the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr. */
 /*     This multiplication by the sign matrix S on the left means */
 /*     changing the sign of I-th row of the matrix R_tsqr according */
 /*     to sign of the I-th diagonal element DIAG(I) of the matrix S. */
 /*     DIAG is stored in WORK( LWT+N*N+1 ) from the SORHR_COL output. */

 /*     (5) and (6) can be combined in a single loop, so the rows in A */
 /*     are accessed only once. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	if (work[lwt + *n * *n + i__] == -1.f) {
 	    i__2 = *n;
 	    for (j = i__; j <= i__2; ++j) {
 		a[i__ + j * a_dim1] = work[lwt + *n * (j - 1) + i__] * -1.f;
 	    }
 	} else {
 	    i__2 = *n - i__ + 1;
 	    scopy_(&i__2, &work[lwt + *n * (i__ - 1) + i__], n, &a[i__ + i__ *
 		     a_dim1], lda);
 	}
    }

    work[1] = (real) lworkopt;
    return 0;

 /*     End of SGETSQRHRT */

 } /* sgetsqrhrt_ */

--- a/lapack-netlib/SRC/sggbak.c
+++ b/lapack-netlib/SRC/sggbak.c
@@ -0,0 +1,720 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGGBAK */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGGBAK + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggbak.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggbak.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggbak.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, */
 /*                          LDV, INFO ) */

 /*       CHARACTER          JOB, SIDE */
 /*       INTEGER            IHI, ILO, INFO, LDV, M, N */
 /*       REAL               LSCALE( * ), RSCALE( * ), V( LDV, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGGBAK forms the right or left eigenvectors of a real generalized */
 /* > eigenvalue problem A*x = lambda*B*x, by backward transformation on */
 /* > the computed eigenvectors of the balanced pair of matrices output by */
 /* > SGGBAL. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOB */
 /* > \verbatim */
 /* >          JOB is CHARACTER*1 */
 /* >          Specifies the type of backward transformation required: */
 /* >          = 'N':  do nothing, return immediately; */
 /* >          = 'P':  do backward transformation for permutation only; */
 /* >          = 'S':  do backward transformation for scaling only; */
 /* >          = 'B':  do backward transformations for both permutation and */
 /* >                  scaling. */
 /* >          JOB must be the same as the argument JOB supplied to SGGBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'R':  V contains right eigenvectors; */
 /* >          = 'L':  V contains left eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrix V.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* >          The integers ILO and IHI determined by SGGBAL. */
 /* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LSCALE */
 /* > \verbatim */
 /* >          LSCALE is REAL array, dimension (N) */
 /* >          Details of the permutations and/or scaling factors applied */
 /* >          to the left side of A and B, as returned by SGGBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] RSCALE */
 /* > \verbatim */
 /* >          RSCALE is REAL array, dimension (N) */
 /* >          Details of the permutations and/or scaling factors applied */
 /* >          to the right side of A and B, as returned by SGGBAL. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix V.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] V */
 /* > \verbatim */
 /* >          V is REAL array, dimension (LDV,M) */
 /* >          On entry, the matrix of right or left eigenvectors to be */
 /* >          transformed, as returned by STGEVC. */
 /* >          On exit, V is overwritten by the transformed eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the matrix V. LDV >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGBcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  See R.C. Ward, Balancing the generalized eigenvalue problem, */
 /* >                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sggbak_(char *job, char *side, integer *n, integer *ilo, 
 	integer *ihi, real *lscale, real *rscale, integer *m, real *v, 
 	integer *ldv, integer *info)
 {
    /* System generated locals */
    integer v_dim1, v_offset, i__1;

    /* Local variables */
    integer i__, k;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    logical leftv;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
 	    integer *), xerbla_(char *, integer *, ftnlen);
    logical rightv;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    --lscale;
    --rscale;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;

    /* Function Body */
    rightv = lsame_(side, "R");
    leftv = lsame_(side, "L");

    *info = 0;
    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
 	    && ! lsame_(job, "B")) {
 	*info = -1;
    } else if (! rightv && ! leftv) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ilo < 1) {
 	*info = -4;
    } else if (*n == 0 && *ihi == 0 && *ilo != 1) {
 	*info = -4;
    } else if (*n > 0 && (*ihi < *ilo || *ihi > f2cmax(1,*n))) {
 	*info = -5;
    } else if (*n == 0 && *ilo == 1 && *ihi != 0) {
 	*info = -5;
    } else if (*m < 0) {
 	*info = -8;
    } else if (*ldv < f2cmax(1,*n)) {
 	*info = -10;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGGBAK", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }
    if (*m == 0) {
 	return 0;
    }
    if (lsame_(job, "N")) {
 	return 0;
    }

    if (*ilo == *ihi) {
 	goto L30;
    }

 /*     Backward balance */

    if (lsame_(job, "S") || lsame_(job, "B")) {

 /*        Backward transformation on right eigenvectors */

 	if (rightv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
 		sscal_(m, &rscale[i__], &v[i__ + v_dim1], ldv);
 /* L10: */
 	    }
 	}

 /*        Backward transformation on left eigenvectors */

 	if (leftv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
 		sscal_(m, &lscale[i__], &v[i__ + v_dim1], ldv);
 /* L20: */
 	    }
 	}
    }

 /*     Backward permutation */

 L30:
    if (lsame_(job, "P") || lsame_(job, "B")) {

 /*        Backward permutation on right eigenvectors */

 	if (rightv) {
 	    if (*ilo == 1) {
 		goto L50;
 	    }

 	    for (i__ = *ilo - 1; i__ >= 1; --i__) {
 		k = rscale[i__];
 		if (k == i__) {
 		    goto L40;
 		}
 		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L40:
 		;
 	    }

 L50:
 	    if (*ihi == *n) {
 		goto L70;
 	    }
 	    i__1 = *n;
 	    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
 		k = rscale[i__];
 		if (k == i__) {
 		    goto L60;
 		}
 		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L60:
 		;
 	    }
 	}

 /*        Backward permutation on left eigenvectors */

 L70:
 	if (leftv) {
 	    if (*ilo == 1) {
 		goto L90;
 	    }
 	    for (i__ = *ilo - 1; i__ >= 1; --i__) {
 		k = lscale[i__];
 		if (k == i__) {
 		    goto L80;
 		}
 		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L80:
 		;
 	    }

 L90:
 	    if (*ihi == *n) {
 		goto L110;
 	    }
 	    i__1 = *n;
 	    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
 		k = lscale[i__];
 		if (k == i__) {
 		    goto L100;
 		}
 		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L100:
 		;
 	    }
 	}
    }

 L110:

    return 0;

 /*     End of SGGBAK */

 } /* sggbak_ */

--- a/lapack-netlib/SRC/sggbal.c
+++ b/lapack-netlib/SRC/sggbal.c
--- a/lapack-netlib/SRC/sgges.c
+++ b/lapack-netlib/SRC/sgges.c
--- a/lapack-netlib/SRC/sgges3.c
+++ b/lapack-netlib/SRC/sgges3.c
--- a/lapack-netlib/SRC/sggesx.c
+++ b/lapack-netlib/SRC/sggesx.c
--- a/lapack-netlib/SRC/sggev.c
+++ b/lapack-netlib/SRC/sggev.c
--- a/lapack-netlib/SRC/sggev3.c
+++ b/lapack-netlib/SRC/sggev3.c
--- a/lapack-netlib/SRC/sggevx.c
+++ b/lapack-netlib/SRC/sggevx.c
--- a/lapack-netlib/SRC/sggglm.c
+++ b/lapack-netlib/SRC/sggglm.c
@@ -0,0 +1,787 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static real c_b32 = -1.f;
 static real c_b34 = 1.f;

 /* > \brief \b SGGGLM */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGGGLM + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggglm.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggglm.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggglm.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, */
 /*                          INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       REAL               A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), */
 /*      $                   X( * ), Y( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGGGLM solves a general Gauss-Markov linear model (GLM) problem: */
 /* > */
 /* >         minimize || y ||_2   subject to   d = A*x + B*y */
 /* >             x */
 /* > */
 /* > where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */
 /* > given N-vector. It is assumed that M <= N <= M+P, and */
 /* > */
 /* >            rank(A) = M    and    rank( A B ) = N. */
 /* > */
 /* > Under these assumptions, the constrained equation is always */
 /* > consistent, and there is a unique solution x and a minimal 2-norm */
 /* > solution y, which is obtained using a generalized QR factorization */
 /* > of the matrices (A, B) given by */
 /* > */
 /* >    A = Q*(R),   B = Q*T*Z. */
 /* >          (0) */
 /* > */
 /* > In particular, if matrix B is square nonsingular, then the problem */
 /* > GLM is equivalent to the following weighted linear least squares */
 /* > problem */
 /* > */
 /* >              minimize || inv(B)*(d-A*x) ||_2 */
 /* >                  x */
 /* > */
 /* > where inv(B) denotes the inverse of B. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix A.  0 <= M <= N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of columns of the matrix B.  P >= N-M. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,M) */
 /* >          On entry, the N-by-M matrix A. */
 /* >          On exit, the upper triangular part of the array A contains */
 /* >          the M-by-M upper triangular matrix R. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,P) */
 /* >          On entry, the N-by-P matrix B. */
 /* >          On exit, if N <= P, the upper triangle of the subarray */
 /* >          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */
 /* >          if N > P, the elements on and above the (N-P)th subdiagonal */
 /* >          contain the N-by-P upper trapezoidal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          On entry, D is the left hand side of the GLM equation. */
 /* >          On exit, D is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is REAL array, dimension (M) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Y */
 /* > \verbatim */
 /* >          Y is REAL array, dimension (P) */
 /* > */
 /* >          On exit, X and Y are the solutions of the GLM problem. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,N+M+P). */
 /* >          For optimum performance, LWORK >= M+f2cmin(N,P)+f2cmax(N,P)*NB, */
 /* >          where NB is an upper bound for the optimal blocksizes for */
 /* >          SGEQRF, SGERQF, SORMQR and SORMRQ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          = 1:  the upper triangular factor R associated with A in the */
 /* >                generalized QR factorization of the pair (A, B) is */
 /* >                singular, so that rank(A) < M; the least squares */
 /* >                solution could not be computed. */
 /* >          = 2:  the bottom (N-M) by (N-M) part of the upper trapezoidal */
 /* >                factor T associated with B in the generalized QR */
 /* >                factorization of the pair (A, B) is singular, so that */
 /* >                rank( A B ) < N; the least squares solution could not */
 /* >                be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realOTHEReigen */

 /*  ===================================================================== */
 /* Subroutine */ int sggglm_(integer *n, integer *m, integer *p, real *a, 
 	integer *lda, real *b, integer *ldb, real *d__, real *x, real *y, 
 	real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer lopt, i__;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
 	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *);
    integer nb, np;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int sggqrf_(integer *, integer *, integer *, real 
 	    *, integer *, real *, real *, integer *, real *, real *, integer *
 	    , integer *);
    integer lwkmin, nb1, nb2, nb3, nb4, lwkopt;
    logical lquery;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, real *, 
 	    integer *, integer *), sormrq_(char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, real *
 	    , integer *, real *, integer *, integer *), 
 	    strtrs_(char *, char *, char *, integer *, integer *, real *, 
 	    integer *, real *, integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  =================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --d__;
    --x;
    --y;
    --work;

    /* Function Body */
    *info = 0;
    np = f2cmin(*n,*p);
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*m < 0 || *m > *n) {
 	*info = -2;
    } else if (*p < 0 || *p < *n - *m) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }

 /*     Calculate workspace */

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkmin = 1;
 	    lwkopt = 1;
 	} else {
 	    nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb2 = ilaenv_(&c__1, "SGERQF", " ", n, m, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb3 = ilaenv_(&c__1, "SORMQR", " ", n, m, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	    nb4 = ilaenv_(&c__1, "SORMRQ", " ", n, m, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 /* Computing MAX */
 	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
 	    nb = f2cmax(i__1,nb4);
 	    lwkmin = *m + *n + *p;
 	    lwkopt = *m + np + f2cmax(*n,*p) * nb;
 	}
 	work[1] = (real) lwkopt;

 	if (*lwork < lwkmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGGGLM", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    x[i__] = 0.f;
 	}
 	i__1 = *p;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    y[i__] = 0.f;
 	}
 	return 0;
    }

 /*     Compute the GQR factorization of matrices A and B: */

 /*          Q**T*A = ( R11 ) M,    Q**T*B*Z**T = ( T11   T12 ) M */
 /*                   (  0  ) N-M                 (  0    T22 ) N-M */
 /*                      M                         M+P-N  N-M */

 /*     where R11 and T22 are upper triangular, and Q and Z are */
 /*     orthogonal. */

    i__1 = *lwork - *m - np;
    sggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m 
 	    + 1], &work[*m + np + 1], &i__1, info);
    lopt = work[*m + np + 1];

 /*     Update left-hand-side vector d = Q**T*d = ( d1 ) M */
 /*                                               ( d2 ) N-M */

    i__1 = f2cmax(1,*n);
    i__2 = *lwork - *m - np;
    sormqr_("Left", "Transpose", n, &c__1, m, &a[a_offset], lda, &work[1], &
 	    d__[1], &i__1, &work[*m + np + 1], &i__2, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[*m + np + 1];
    lopt = f2cmax(i__1,i__2);

 /*     Solve T22*y2 = d2 for y2 */

    if (*n > *m) {
 	i__1 = *n - *m;
 	i__2 = *n - *m;
 	strtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 
 		+ (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, 
 		info);

 	if (*info > 0) {
 	    *info = 1;
 	    return 0;
 	}

 	i__1 = *n - *m;
 	scopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1);
    }

 /*     Set y1 = 0 */

    i__1 = *m + *p - *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	y[i__] = 0.f;
 /* L10: */
    }

 /*     Update d1 = d1 - T12*y2 */

    i__1 = *n - *m;
    sgemv_("No transpose", m, &i__1, &c_b32, &b[(*m + *p - *n + 1) * b_dim1 + 
 	    1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b34, &d__[1], &c__1);

 /*     Solve triangular system: R11*x = d1 */

    if (*m > 0) {
 	strtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], 
 		lda, &d__[1], m, info);

 	if (*info > 0) {
 	    *info = 2;
 	    return 0;
 	}

 /*        Copy D to X */

 	scopy_(m, &d__[1], &c__1, &x[1], &c__1);
    }

 /*     Backward transformation y = Z**T *y */

 /* Computing MAX */
    i__1 = 1, i__2 = *n - *p + 1;
    i__3 = f2cmax(1,*p);
    i__4 = *lwork - *m - np;
    sormrq_("Left", "Transpose", p, &c__1, &np, &b[f2cmax(i__1,i__2) + b_dim1], 
 	    ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &i__4, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[*m + np + 1];
    work[1] = (real) (*m + np + f2cmax(i__1,i__2));

    return 0;

 /*     End of SGGGLM */

 } /* sggglm_ */

--- a/lapack-netlib/SRC/sgghd3.c
+++ b/lapack-netlib/SRC/sgghd3.c
--- a/lapack-netlib/SRC/sgghrd.c
+++ b/lapack-netlib/SRC/sgghrd.c
@@ -0,0 +1,784 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static real c_b10 = 0.f;
 static real c_b11 = 1.f;
 static integer c__1 = 1;

 /* > \brief \b SGGHRD */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGGHRD + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgghrd.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgghrd.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgghrd.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */
 /*                          LDQ, Z, LDZ, INFO ) */

 /*       CHARACTER          COMPQ, COMPZ */
 /*       INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N */
 /*       REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGGHRD reduces a pair of real matrices (A,B) to generalized upper */
 /* > Hessenberg form using orthogonal transformations, where A is a */
 /* > general matrix and B is upper triangular.  The form of the */
 /* > generalized eigenvalue problem is */
 /* >    A*x = lambda*B*x, */
 /* > and B is typically made upper triangular by computing its QR */
 /* > factorization and moving the orthogonal matrix Q to the left side */
 /* > of the equation. */
 /* > */
 /* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */
 /* >    Q**T*A*Z = H */
 /* > and transforms B to another upper triangular matrix T: */
 /* >    Q**T*B*Z = T */
 /* > in order to reduce the problem to its standard form */
 /* >    H*y = lambda*T*y */
 /* > where y = Z**T*x. */
 /* > */
 /* > The orthogonal matrices Q and Z are determined as products of Givens */
 /* > rotations.  They may either be formed explicitly, or they may be */
 /* > postmultiplied into input matrices Q1 and Z1, so that */
 /* > */
 /* >      Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T */
 /* > */
 /* >      Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T */
 /* > */
 /* > If Q1 is the orthogonal matrix from the QR factorization of B in the */
 /* > original equation A*x = lambda*B*x, then SGGHRD reduces the original */
 /* > problem to generalized Hessenberg form. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] COMPQ */
 /* > \verbatim */
 /* >          COMPQ is CHARACTER*1 */
 /* >          = 'N': do not compute Q; */
 /* >          = 'I': Q is initialized to the unit matrix, and the */
 /* >                 orthogonal matrix Q is returned; */
 /* >          = 'V': Q must contain an orthogonal matrix Q1 on entry, */
 /* >                 and the product Q1*Q is returned. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] COMPZ */
 /* > \verbatim */
 /* >          COMPZ is CHARACTER*1 */
 /* >          = 'N': do not compute Z; */
 /* >          = 'I': Z is initialized to the unit matrix, and the */
 /* >                 orthogonal matrix Z is returned; */
 /* >          = 'V': Z must contain an orthogonal matrix Z1 on entry, */
 /* >                 and the product Z1*Z is returned. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* > */
 /* >          ILO and IHI mark the rows and columns of A which are to be */
 /* >          reduced.  It is assumed that A is already upper triangular */
 /* >          in rows and columns 1:ILO-1 and IHI+1:N.  ILO and IHI are */
 /* >          normally set by a previous call to SGGBAL; otherwise they */
 /* >          should be set to 1 and N respectively. */
 /* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA, N) */
 /* >          On entry, the N-by-N general matrix to be reduced. */
 /* >          On exit, the upper triangle and the first subdiagonal of A */
 /* >          are overwritten with the upper Hessenberg matrix H, and the */
 /* >          rest is set to zero. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB, N) */
 /* >          On entry, the N-by-N upper triangular matrix B. */
 /* >          On exit, the upper triangular matrix T = Q**T B Z.  The */
 /* >          elements below the diagonal are set to zero. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] Q */
 /* > \verbatim */
 /* >          Q is REAL array, dimension (LDQ, N) */
 /* >          On entry, if COMPQ = 'V', the orthogonal matrix Q1, */
 /* >          typically from the QR factorization of B. */
 /* >          On exit, if COMPQ='I', the orthogonal matrix Q, and if */
 /* >          COMPQ = 'V', the product Q1*Q. */
 /* >          Not referenced if COMPQ='N'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDQ */
 /* > \verbatim */
 /* >          LDQ is INTEGER */
 /* >          The leading dimension of the array Q. */
 /* >          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] Z */
 /* > \verbatim */
 /* >          Z is REAL array, dimension (LDZ, N) */
 /* >          On entry, if COMPZ = 'V', the orthogonal matrix Z1. */
 /* >          On exit, if COMPZ='I', the orthogonal matrix Z, and if */
 /* >          COMPZ = 'V', the product Z1*Z. */
 /* >          Not referenced if COMPZ='N'. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >          The leading dimension of the array Z. */
 /* >          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realOTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  This routine reduces A to Hessenberg and B to triangular form by */
 /* >  an unblocked reduction, as described in _Matrix_Computations_, */
 /* >  by Golub and Van Loan (Johns Hopkins Press.) */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sgghrd_(char *compq, char *compz, integer *n, integer *
 	ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real 
 	*q, integer *ldq, real *z__, integer *ldz, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, 
 	    z_offset, i__1, i__2, i__3;

    /* Local variables */
    integer jcol;
    real temp;
    integer jrow;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
 	    integer *, real *, real *);
    real c__, s;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    integer icompq;
    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, 
 	    real *, real *, integer *), slartg_(real *, real *, real *
 	    , real *, real *);
    integer icompz;
    logical ilq, ilz;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Decode COMPQ */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;

    /* Function Body */
    if (lsame_(compq, "N")) {
 	ilq = FALSE_;
 	icompq = 1;
    } else if (lsame_(compq, "V")) {
 	ilq = TRUE_;
 	icompq = 2;
    } else if (lsame_(compq, "I")) {
 	ilq = TRUE_;
 	icompq = 3;
    } else {
 	icompq = 0;
    }

 /*     Decode COMPZ */

    if (lsame_(compz, "N")) {
 	ilz = FALSE_;
 	icompz = 1;
    } else if (lsame_(compz, "V")) {
 	ilz = TRUE_;
 	icompz = 2;
    } else if (lsame_(compz, "I")) {
 	ilz = TRUE_;
 	icompz = 3;
    } else {
 	icompz = 0;
    }

 /*     Test the input parameters. */

    *info = 0;
    if (icompq <= 0) {
 	*info = -1;
    } else if (icompz <= 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ilo < 1) {
 	*info = -4;
    } else if (*ihi > *n || *ihi < *ilo - 1) {
 	*info = -5;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -9;
    } else if (ilq && *ldq < *n || *ldq < 1) {
 	*info = -11;
    } else if (ilz && *ldz < *n || *ldz < 1) {
 	*info = -13;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGGHRD", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Initialize Q and Z if desired. */

    if (icompq == 3) {
 	slaset_("Full", n, n, &c_b10, &c_b11, &q[q_offset], ldq);
    }
    if (icompz == 3) {
 	slaset_("Full", n, n, &c_b10, &c_b11, &z__[z_offset], ldz);
    }

 /*     Quick return if possible */

    if (*n <= 1) {
 	return 0;
    }

 /*     Zero out lower triangle of B */

    i__1 = *n - 1;
    for (jcol = 1; jcol <= i__1; ++jcol) {
 	i__2 = *n;
 	for (jrow = jcol + 1; jrow <= i__2; ++jrow) {
 	    b[jrow + jcol * b_dim1] = 0.f;
 /* L10: */
 	}
 /* L20: */
    }

 /*     Reduce A and B */

    i__1 = *ihi - 2;
    for (jcol = *ilo; jcol <= i__1; ++jcol) {

 	i__2 = jcol + 2;
 	for (jrow = *ihi; jrow >= i__2; --jrow) {

 /*           Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */

 	    temp = a[jrow - 1 + jcol * a_dim1];
 	    slartg_(&temp, &a[jrow + jcol * a_dim1], &c__, &s, &a[jrow - 1 + 
 		    jcol * a_dim1]);
 	    a[jrow + jcol * a_dim1] = 0.f;
 	    i__3 = *n - jcol;
 	    srot_(&i__3, &a[jrow - 1 + (jcol + 1) * a_dim1], lda, &a[jrow + (
 		    jcol + 1) * a_dim1], lda, &c__, &s);
 	    i__3 = *n + 2 - jrow;
 	    srot_(&i__3, &b[jrow - 1 + (jrow - 1) * b_dim1], ldb, &b[jrow + (
 		    jrow - 1) * b_dim1], ldb, &c__, &s);
 	    if (ilq) {
 		srot_(n, &q[(jrow - 1) * q_dim1 + 1], &c__1, &q[jrow * q_dim1 
 			+ 1], &c__1, &c__, &s);
 	    }

 /*           Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */

 	    temp = b[jrow + jrow * b_dim1];
 	    slartg_(&temp, &b[jrow + (jrow - 1) * b_dim1], &c__, &s, &b[jrow 
 		    + jrow * b_dim1]);
 	    b[jrow + (jrow - 1) * b_dim1] = 0.f;
 	    srot_(ihi, &a[jrow * a_dim1 + 1], &c__1, &a[(jrow - 1) * a_dim1 + 
 		    1], &c__1, &c__, &s);
 	    i__3 = jrow - 1;
 	    srot_(&i__3, &b[jrow * b_dim1 + 1], &c__1, &b[(jrow - 1) * b_dim1 
 		    + 1], &c__1, &c__, &s);
 	    if (ilz) {
 		srot_(n, &z__[jrow * z_dim1 + 1], &c__1, &z__[(jrow - 1) * 
 			z_dim1 + 1], &c__1, &c__, &s);
 	    }
 /* L30: */
 	}
 /* L40: */
    }

    return 0;

 /*     End of SGGHRD */

 } /* sgghrd_ */

--- a/lapack-netlib/SRC/sgglse.c
+++ b/lapack-netlib/SRC/sgglse.c
@@ -0,0 +1,786 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;
 static real c_b31 = -1.f;
 static real c_b33 = 1.f;

 /* > \brief <b> SGGLSE solves overdetermined or underdetermined systems for OTHER matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGGLSE + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgglse.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgglse.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgglse.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, */
 /*                          INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       REAL               A( LDA, * ), B( LDB, * ), C( * ), D( * ), */
 /*      $                   WORK( * ), X( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGGLSE solves the linear equality-constrained least squares (LSE) */
 /* > problem: */
 /* > */
 /* >         minimize || c - A*x ||_2   subject to   B*x = d */
 /* > */
 /* > where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */
 /* > M-vector, and d is a given P-vector. It is assumed that */
 /* > P <= N <= M+P, and */
 /* > */
 /* >          rank(B) = P and  rank( (A) ) = N. */
 /* >                               ( (B) ) */
 /* > */
 /* > These conditions ensure that the LSE problem has a unique solution, */
 /* > which is obtained using a generalized RQ factorization of the */
 /* > matrices (B, A) given by */
 /* > */
 /* >    B = (0 R)*Q,   A = Z*T*Q. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrices A and B. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of rows of the matrix B. 0 <= P <= N <= M+P. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,N) */
 /* >          On entry, the P-by-N matrix B. */
 /* >          On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */
 /* >          contains the P-by-P upper triangular matrix R. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] C */
 /* > \verbatim */
 /* >          C is REAL array, dimension (M) */
 /* >          On entry, C contains the right hand side vector for the */
 /* >          least squares part of the LSE problem. */
 /* >          On exit, the residual sum of squares for the solution */
 /* >          is given by the sum of squares of elements N-P+1 to M of */
 /* >          vector C. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (P) */
 /* >          On entry, D contains the right hand side vector for the */
 /* >          constrained equation. */
 /* >          On exit, D is destroyed. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is REAL array, dimension (N) */
 /* >          On exit, X is the solution of the LSE problem. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,M+N+P). */
 /* >          For optimum performance LWORK >= P+f2cmin(M,N)+f2cmax(M,N)*NB, */
 /* >          where NB is an upper bound for the optimal blocksizes for */
 /* >          SGEQRF, SGERQF, SORMQR and SORMRQ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          = 1:  the upper triangular factor R associated with B in the */
 /* >                generalized RQ factorization of the pair (B, A) is */
 /* >                singular, so that rank(B) < P; the least squares */
 /* >                solution could not be computed. */
 /* >          = 2:  the (N-P) by (N-P) part of the upper trapezoidal factor */
 /* >                T associated with A in the generalized RQ factorization */
 /* >                of the pair (B, A) is singular, so that */
 /* >                rank( (A) ) < N; the least squares solution could not */
 /* >                    ( (B) ) */
 /* >                be computed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realOTHERsolve */

 /*  ===================================================================== */
 /* Subroutine */ int sgglse_(integer *m, integer *n, integer *p, real *a, 
 	integer *lda, real *b, integer *ldb, real *c__, real *d__, real *x, 
 	real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    integer lopt;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
 	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), 
 	    saxpy_(integer *, real *, real *, integer *, real *, integer *), 
 	    strmv_(char *, char *, char *, integer *, real *, integer *, real 
 	    *, integer *);
    integer nb, mn, nr;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int sggrqf_(integer *, integer *, integer *, real 
 	    *, integer *, real *, real *, integer *, real *, real *, integer *
 	    , integer *);
    integer lwkmin, nb1, nb2, nb3, nb4, lwkopt;
    logical lquery;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, real *, 
 	    integer *, integer *), sormrq_(char *, char *, 
 	    integer *, integer *, integer *, real *, integer *, real *, real *
 	    , integer *, real *, integer *, integer *), 
 	    strtrs_(char *, char *, char *, integer *, integer *, real *, 
 	    integer *, real *, integer *, integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --c__;
    --d__;
    --x;
    --work;

    /* Function Body */
    *info = 0;
    mn = f2cmin(*m,*n);
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*p < 0 || *p > *n || *p < *n - *m) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*p)) {
 	*info = -7;
    }

 /*     Calculate workspace */

    if (*info == 0) {
 	if (*n == 0) {
 	    lwkmin = 1;
 	    lwkopt = 1;
 	} else {
 	    nb1 = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb2 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
 		     (ftnlen)1);
 	    nb3 = ilaenv_(&c__1, "SORMQR", " ", m, n, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	    nb4 = ilaenv_(&c__1, "SORMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 /* Computing MAX */
 	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
 	    nb = f2cmax(i__1,nb4);
 	    lwkmin = *m + *n + *p;
 	    lwkopt = *p + mn + f2cmax(*m,*n) * nb;
 	}
 	work[1] = (real) lwkopt;

 	if (*lwork < lwkmin && ! lquery) {
 	    *info = -12;
 	}
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGGLSE", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Compute the GRQ factorization of matrices B and A: */

 /*            B*Q**T = (  0  T12 ) P   Z**T*A*Q**T = ( R11 R12 ) N-P */
 /*                        N-P  P                     (  0  R22 ) M+P-N */
 /*                                                      N-P  P */

 /*     where T12 and R11 are upper triangular, and Q and Z are */
 /*     orthogonal. */

    i__1 = *lwork - *p - mn;
    sggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p 
 	    + 1], &work[*p + mn + 1], &i__1, info);
    lopt = work[*p + mn + 1];

 /*     Update c = Z**T *c = ( c1 ) N-P */
 /*                          ( c2 ) M+P-N */

    i__1 = f2cmax(1,*m);
    i__2 = *lwork - *p - mn;
    sormqr_("Left", "Transpose", m, &c__1, &mn, &a[a_offset], lda, &work[*p + 
 	    1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[*p + mn + 1];
    lopt = f2cmax(i__1,i__2);

 /*     Solve T12*x2 = d for x2 */

    if (*p > 0) {
 	strtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + 
 		1) * b_dim1 + 1], ldb, &d__[1], p, info);

 	if (*info > 0) {
 	    *info = 1;
 	    return 0;
 	}

 /*        Put the solution in X */

 	scopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1);

 /*        Update c1 */

 	i__1 = *n - *p;
 	sgemv_("No transpose", &i__1, p, &c_b31, &a[(*n - *p + 1) * a_dim1 + 
 		1], lda, &d__[1], &c__1, &c_b33, &c__[1], &c__1);
    }

 /*     Solve R11*x1 = c1 for x1 */

    if (*n > *p) {
 	i__1 = *n - *p;
 	i__2 = *n - *p;
 	strtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[
 		a_offset], lda, &c__[1], &i__2, info);

 	if (*info > 0) {
 	    *info = 2;
 	    return 0;
 	}

 /*        Put the solutions in X */

 	i__1 = *n - *p;
 	scopy_(&i__1, &c__[1], &c__1, &x[1], &c__1);
    }

 /*     Compute the residual vector: */

    if (*m < *n) {
 	nr = *m + *p - *n;
 	if (nr > 0) {
 	    i__1 = *n - *m;
 	    sgemv_("No transpose", &nr, &i__1, &c_b31, &a[*n - *p + 1 + (*m + 
 		    1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b33, &c__[*n - 
 		    *p + 1], &c__1);
 	}
    } else {
 	nr = *p;
    }
    if (nr > 0) {
 	strmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n 
 		- *p + 1) * a_dim1], lda, &d__[1], &c__1);
 	saxpy_(&nr, &c_b31, &d__[1], &c__1, &c__[*n - *p + 1], &c__1);
    }

 /*     Backward transformation x = Q**T*x */

    i__1 = *lwork - *p - mn;
    sormrq_("Left", "Transpose", n, &c__1, p, &b[b_offset], ldb, &work[1], &x[
 	    1], n, &work[*p + mn + 1], &i__1, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[*p + mn + 1];
    work[1] = (real) (*p + mn + f2cmax(i__1,i__2));

    return 0;

 /*     End of SGGLSE */

 } /* sgglse_ */

--- a/lapack-netlib/SRC/sggqrf.c
+++ b/lapack-netlib/SRC/sggqrf.c
@@ -0,0 +1,718 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b SGGQRF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGGQRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggqrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggqrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggqrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, */
 /*                          LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       REAL               A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */
 /*      $                   WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGGQRF computes a generalized QR factorization of an N-by-M matrix A */
 /* > and an N-by-P matrix B: */
 /* > */
 /* >             A = Q*R,        B = Q*T*Z, */
 /* > */
 /* > where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */
 /* > matrix, and R and T assume one of the forms: */
 /* > */
 /* > if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N, */
 /* >                 (  0  ) N-M                         N   M-N */
 /* >                    M */
 /* > */
 /* > where R11 is upper triangular, and */
 /* > */
 /* > if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P, */
 /* >                  P-N  N                           ( T21 ) P */
 /* >                                                      P */
 /* > */
 /* > where T12 or T21 is upper triangular. */
 /* > */
 /* > In particular, if B is square and nonsingular, the GQR factorization */
 /* > of A and B implicitly gives the QR factorization of inv(B)*A: */
 /* > */
 /* >              inv(B)*A = Z**T*(inv(T)*R) */
 /* > */
 /* > where inv(B) denotes the inverse of the matrix B, and Z**T denotes the */
 /* > transpose of the matrix Z. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of rows of the matrices A and B. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of columns of the matrix B.  P >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,M) */
 /* >          On entry, the N-by-M matrix A. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(N,M)-by-M upper trapezoidal matrix R (R is */
 /* >          upper triangular if N >= M); the elements below the diagonal, */
 /* >          with the array TAUA, represent the orthogonal matrix Q as a */
 /* >          product of f2cmin(N,M) elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUA */
 /* > \verbatim */
 /* >          TAUA is REAL array, dimension (f2cmin(N,M)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix Q (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,P) */
 /* >          On entry, the N-by-P matrix B. */
 /* >          On exit, if N <= P, the upper triangle of the subarray */
 /* >          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */
 /* >          if N > P, the elements on and above the (N-P)-th subdiagonal */
 /* >          contain the N-by-P upper trapezoidal matrix T; the remaining */
 /* >          elements, with the array TAUB, represent the orthogonal */
 /* >          matrix Z as a product of elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUB */
 /* > \verbatim */
 /* >          TAUB is REAL array, dimension (f2cmin(N,P)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix Z (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */
 /* >          For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */
 /* >          where NB1 is the optimal blocksize for the QR factorization */
 /* >          of an N-by-M matrix, NB2 is the optimal blocksize for the */
 /* >          RQ factorization of an N-by-P matrix, and NB3 is the optimal */
 /* >          blocksize for a call of SORMQR. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realOTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(n,m). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taua * v * v**T */
 /* > */
 /* >  where taua is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */
 /* >  and taua in TAUA(i). */
 /* >  To form Q explicitly, use LAPACK subroutine SORGQR. */
 /* >  To use Q to update another matrix, use LAPACK subroutine SORMQR. */
 /* > */
 /* >  The matrix Z is represented as a product of elementary reflectors */
 /* > */
 /* >     Z = H(1) H(2) . . . H(k), where k = f2cmin(n,p). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taub * v * v**T */
 /* > */
 /* >  where taub is a real scalar, and v is a real vector with */
 /* >  v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in */
 /* >  B(n-k+i,1:p-k+i-1), and taub in TAUB(i). */
 /* >  To form Z explicitly, use LAPACK subroutine SORGRQ. */
 /* >  To use Z to update another matrix, use LAPACK subroutine SORMRQ. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sggqrf_(integer *n, integer *m, integer *p, real *a, 
 	integer *lda, real *taua, real *b, integer *ldb, real *taub, real *
 	work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    integer lopt, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
 	    *, real *, real *, integer *, integer *), sgerqf_(integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, integer *
 	    );
    integer nb1, nb2, nb3, lwkopt;
    logical lquery;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, real *, 
 	    integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;

    /* Function Body */
    *info = 0;
    nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb2 = ilaenv_(&c__1, "SGERQF", " ", n, p, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb3 = ilaenv_(&c__1, "SORMQR", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
    i__1 = f2cmax(nb1,nb2);
    nb = f2cmax(i__1,nb3);
 /* Computing MAX */
    i__1 = f2cmax(*n,*m);
    lwkopt = f2cmax(i__1,*p) * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*n < 0) {
 	*info = -1;
    } else if (*m < 0) {
 	*info = -2;
    } else if (*p < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -8;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*n), i__1 = f2cmax(i__1,*m);
 	if (*lwork < f2cmax(i__1,*p) && ! lquery) {
 	    *info = -11;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGGQRF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     QR factorization of N-by-M matrix A: A = Q*R */

    sgeqrf_(n, m, &a[a_offset], lda, &taua[1], &work[1], lwork, info);
    lopt = work[1];

 /*     Update B := Q**T*B. */

    i__1 = f2cmin(*n,*m);
    sormqr_("Left", "Transpose", n, p, &i__1, &a[a_offset], lda, &taua[1], &b[
 	    b_offset], ldb, &work[1], lwork, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[1];
    lopt = f2cmax(i__1,i__2);

 /*     RQ factorization of N-by-P matrix B: B = T*Z. */

    sgerqf_(n, p, &b[b_offset], ldb, &taub[1], &work[1], lwork, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[1];
    work[1] = (real) f2cmax(i__1,i__2);

    return 0;

 /*     End of SGGQRF */

 } /* sggqrf_ */

--- a/lapack-netlib/SRC/sggrqf.c
+++ b/lapack-netlib/SRC/sggrqf.c
@@ -0,0 +1,719 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b SGGRQF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGGRQF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggrqf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggrqf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggrqf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, */
 /*                          LWORK, INFO ) */

 /*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
 /*       REAL               A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */
 /*      $                   WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGGRQF computes a generalized RQ factorization of an M-by-N matrix A */
 /* > and a P-by-N matrix B: */
 /* > */
 /* >             A = R*Q,        B = Z*T*Q, */
 /* > */
 /* > where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */
 /* > matrix, and R and T assume one of the forms: */
 /* > */
 /* > if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N, */
 /* >                  N-M  M                           ( R21 ) N */
 /* >                                                      N */
 /* > */
 /* > where R12 or R21 is upper triangular, and */
 /* > */
 /* > if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P, */
 /* >                 (  0  ) P-N                         P   N-P */
 /* >                    N */
 /* > */
 /* > where T11 is upper triangular. */
 /* > */
 /* > In particular, if B is square and nonsingular, the GRQ factorization */
 /* > of A and B implicitly gives the RQ factorization of A*inv(B): */
 /* > */
 /* >              A*inv(B) = (R*inv(T))*Z**T */
 /* > */
 /* > where inv(B) denotes the inverse of the matrix B, and Z**T denotes the */
 /* > transpose of the matrix Z. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of rows of the matrix B.  P >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrices A and B. N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, if M <= N, the upper triangle of the subarray */
 /* >          A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; */
 /* >          if M > N, the elements on and above the (M-N)-th subdiagonal */
 /* >          contain the M-by-N upper trapezoidal matrix R; the remaining */
 /* >          elements, with the array TAUA, represent the orthogonal */
 /* >          matrix Q as a product of elementary reflectors (see Further */
 /* >          Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUA */
 /* > \verbatim */
 /* >          TAUA is REAL array, dimension (f2cmin(M,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix Q (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,N) */
 /* >          On entry, the P-by-N matrix B. */
 /* >          On exit, the elements on and above the diagonal of the array */
 /* >          contain the f2cmin(P,N)-by-N upper trapezoidal matrix T (T is */
 /* >          upper triangular if P >= N); the elements below the diagonal, */
 /* >          with the array TAUB, represent the orthogonal matrix Z as a */
 /* >          product of elementary reflectors (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] TAUB */
 /* > \verbatim */
 /* >          TAUB is REAL array, dimension (f2cmin(P,N)) */
 /* >          The scalar factors of the elementary reflectors which */
 /* >          represent the orthogonal matrix Z (see Further Details). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */
 /* >          For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */
 /* >          where NB1 is the optimal blocksize for the RQ factorization */
 /* >          of an M-by-N matrix, NB2 is the optimal blocksize for the */
 /* >          QR factorization of a P-by-N matrix, and NB3 is the optimal */
 /* >          blocksize for a call of SORMRQ. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INF0= -i, the i-th argument had an illegal value. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realOTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  The matrix Q is represented as a product of elementary reflectors */
 /* > */
 /* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taua * v * v**T */
 /* > */
 /* >  where taua is a real scalar, and v is a real vector with */
 /* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
 /* >  A(m-k+i,1:n-k+i-1), and taua in TAUA(i). */
 /* >  To form Q explicitly, use LAPACK subroutine SORGRQ. */
 /* >  To use Q to update another matrix, use LAPACK subroutine SORMRQ. */
 /* > */
 /* >  The matrix Z is represented as a product of elementary reflectors */
 /* > */
 /* >     Z = H(1) H(2) . . . H(k), where k = f2cmin(p,n). */
 /* > */
 /* >  Each H(i) has the form */
 /* > */
 /* >     H(i) = I - taub * v * v**T */
 /* > */
 /* >  where taub is a real scalar, and v is a real vector with */
 /* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), */
 /* >  and taub in TAUB(i). */
 /* >  To form Z explicitly, use LAPACK subroutine SORGQR. */
 /* >  To use Z to update another matrix, use LAPACK subroutine SORMQR. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sggrqf_(integer *m, integer *p, integer *n, real *a, 
 	integer *lda, real *taua, real *b, integer *ldb, real *taub, real *
 	work, integer *lwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer lopt, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
 	    *, real *, real *, integer *, integer *), sgerqf_(integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, integer *
 	    );
    integer nb1, nb2, nb3, lwkopt;
    logical lquery;
    extern /* Subroutine */ int sormrq_(char *, char *, integer *, integer *, 
 	    integer *, real *, integer *, real *, real *, integer *, real *, 
 	    integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;

    /* Function Body */
    *info = 0;
    nb1 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb2 = ilaenv_(&c__1, "SGEQRF", " ", p, n, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
    nb3 = ilaenv_(&c__1, "SORMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
    i__1 = f2cmax(nb1,nb2);
    nb = f2cmax(i__1,nb3);
 /* Computing MAX */
    i__1 = f2cmax(*n,*m);
    lwkopt = f2cmax(i__1,*p) * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
 	*info = -1;
    } else if (*p < 0) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -5;
    } else if (*ldb < f2cmax(1,*p)) {
 	*info = -8;
    } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = f2cmax(1,*m), i__1 = f2cmax(i__1,*p);
 	if (*lwork < f2cmax(i__1,*n) && ! lquery) {
 	    *info = -11;
 	}
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGGRQF", &i__1, (ftnlen)6);
 	return 0;
    } else if (lquery) {
 	return 0;
    }

 /*     RQ factorization of M-by-N matrix A: A = R*Q */

    sgerqf_(m, n, &a[a_offset], lda, &taua[1], &work[1], lwork, info);
    lopt = work[1];

 /*     Update B := B*Q**T */

    i__1 = f2cmin(*m,*n);
 /* Computing MAX */
    i__2 = 1, i__3 = *m - *n + 1;
    sormrq_("Right", "Transpose", p, n, &i__1, &a[f2cmax(i__2,i__3) + a_dim1], 
 	    lda, &taua[1], &b[b_offset], ldb, &work[1], lwork, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[1];
    lopt = f2cmax(i__1,i__2);

 /*     QR factorization of P-by-N matrix B: B = Z*T */

    sgeqrf_(p, n, &b[b_offset], ldb, &taub[1], &work[1], lwork, info);
 /* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[1];
    work[1] = (real) f2cmax(i__1,i__2);

    return 0;

 /*     End of SGGRQF */

 } /* sggrqf_ */

--- a/lapack-netlib/SRC/sggsvd3.c
+++ b/lapack-netlib/SRC/sggsvd3.c
@@ -0,0 +1,936 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c_n1 = -1;
 static integer c__1 = 1;

 /* > \brief <b> SGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices</b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGGSVD3 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggsvd3
 .f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggsvd3
 .f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggsvd3
 .f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, */
 /*                           LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, */
 /*                           LWORK, IWORK, INFO ) */

 /*       CHARACTER          JOBQ, JOBU, JOBV */
 /*       INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */
 /*       INTEGER            IWORK( * ) */
 /*       REAL               A( LDA, * ), ALPHA( * ), B( LDB, * ), */
 /*      $                   BETA( * ), Q( LDQ, * ), U( LDU, * ), */
 /*      $                   V( LDV, * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGGSVD3 computes the generalized singular value decomposition (GSVD) */
 /* > of an M-by-N real matrix A and P-by-N real matrix B: */
 /* > */
 /* >       U**T*A*Q = D1*( 0 R ),    V**T*B*Q = D2*( 0 R ) */
 /* > */
 /* > where U, V and Q are orthogonal matrices. */
 /* > Let K+L = the effective numerical rank of the matrix (A**T,B**T)**T, */
 /* > then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and */
 /* > D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the */
 /* > following structures, respectively: */
 /* > */
 /* > If M-K-L >= 0, */
 /* > */
 /* >                     K  L */
 /* >        D1 =     K ( I  0 ) */
 /* >                 L ( 0  C ) */
 /* >             M-K-L ( 0  0 ) */
 /* > */
 /* >                   K  L */
 /* >        D2 =   L ( 0  S ) */
 /* >             P-L ( 0  0 ) */
 /* > */
 /* >                 N-K-L  K    L */
 /* >   ( 0 R ) = K (  0   R11  R12 ) */
 /* >             L (  0    0   R22 ) */
 /* > */
 /* > where */
 /* > */
 /* >   C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */
 /* >   S = diag( BETA(K+1),  ... , BETA(K+L) ), */
 /* >   C**2 + S**2 = I. */
 /* > */
 /* >   R is stored in A(1:K+L,N-K-L+1:N) on exit. */
 /* > */
 /* > If M-K-L < 0, */
 /* > */
 /* >                   K M-K K+L-M */
 /* >        D1 =   K ( I  0    0   ) */
 /* >             M-K ( 0  C    0   ) */
 /* > */
 /* >                     K M-K K+L-M */
 /* >        D2 =   M-K ( 0  S    0  ) */
 /* >             K+L-M ( 0  0    I  ) */
 /* >               P-L ( 0  0    0  ) */
 /* > */
 /* >                    N-K-L  K   M-K  K+L-M */
 /* >   ( 0 R ) =     K ( 0    R11  R12  R13  ) */
 /* >               M-K ( 0     0   R22  R23  ) */
 /* >             K+L-M ( 0     0    0   R33  ) */
 /* > */
 /* > where */
 /* > */
 /* >   C = diag( ALPHA(K+1), ... , ALPHA(M) ), */
 /* >   S = diag( BETA(K+1),  ... , BETA(M) ), */
 /* >   C**2 + S**2 = I. */
 /* > */
 /* >   (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored */
 /* >   ( 0  R22 R23 ) */
 /* >   in B(M-K+1:L,N+M-K-L+1:N) on exit. */
 /* > */
 /* > The routine computes C, S, R, and optionally the orthogonal */
 /* > transformation matrices U, V and Q. */
 /* > */
 /* > In particular, if B is an N-by-N nonsingular matrix, then the GSVD of */
 /* > A and B implicitly gives the SVD of A*inv(B): */
 /* >                      A*inv(B) = U*(D1*inv(D2))*V**T. */
 /* > If ( A**T,B**T)**T  has orthonormal columns, then the GSVD of A and B is */
 /* > also equal to the CS decomposition of A and B. Furthermore, the GSVD */
 /* > can be used to derive the solution of the eigenvalue problem: */
 /* >                      A**T*A x = lambda* B**T*B x. */
 /* > In some literature, the GSVD of A and B is presented in the form */
 /* >                  U**T*A*X = ( 0 D1 ),   V**T*B*X = ( 0 D2 ) */
 /* > where U and V are orthogonal and X is nonsingular, D1 and D2 are */
 /* > ``diagonal''.  The former GSVD form can be converted to the latter */
 /* > form by taking the nonsingular matrix X as */
 /* > */
 /* >                      X = Q*( I   0    ) */
 /* >                            ( 0 inv(R) ). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOBU */
 /* > \verbatim */
 /* >          JOBU is CHARACTER*1 */
 /* >          = 'U':  Orthogonal matrix U is computed; */
 /* >          = 'N':  U is not computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBV */
 /* > \verbatim */
 /* >          JOBV is CHARACTER*1 */
 /* >          = 'V':  Orthogonal matrix V is computed; */
 /* >          = 'N':  V is not computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] JOBQ */
 /* > \verbatim */
 /* >          JOBQ is CHARACTER*1 */
 /* >          = 'Q':  Orthogonal matrix Q is computed; */
 /* >          = 'N':  Q is not computed. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of rows of the matrix A.  M >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The number of columns of the matrices A and B.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] P */
 /* > \verbatim */
 /* >          P is INTEGER */
 /* >          The number of rows of the matrix B.  P >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] K */
 /* > \verbatim */
 /* >          K is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[out] L */
 /* > \verbatim */
 /* >          L is INTEGER */
 /* > */
 /* >          On exit, K and L specify the dimension of the subblocks */
 /* >          described in Purpose. */
 /* >          K + L = effective numerical rank of (A**T,B**T)**T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >          On entry, the M-by-N matrix A. */
 /* >          On exit, A contains the triangular matrix R, or part of R. */
 /* >          See Purpose for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,N) */
 /* >          On entry, the P-by-N matrix B. */
 /* >          On exit, B contains the triangular matrix R if M-K-L < 0. */
 /* >          See Purpose for details. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] ALPHA */
 /* > \verbatim */
 /* >          ALPHA is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BETA */
 /* > \verbatim */
 /* >          BETA is REAL array, dimension (N) */
 /* > */
 /* >          On exit, ALPHA and BETA contain the generalized singular */
 /* >          value pairs of A and B; */
 /* >            ALPHA(1:K) = 1, */
 /* >            BETA(1:K)  = 0, */
 /* >          and if M-K-L >= 0, */
 /* >            ALPHA(K+1:K+L) = C, */
 /* >            BETA(K+1:K+L)  = S, */
 /* >          or if M-K-L < 0, */
 /* >            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 */
 /* >            BETA(K+1:M) =S, BETA(M+1:K+L) =1 */
 /* >          and */
 /* >            ALPHA(K+L+1:N) = 0 */
 /* >            BETA(K+L+1:N)  = 0 */
 /* > \endverbatim */
 /* > */
 /* > \param[out] U */
 /* > \verbatim */
 /* >          U is REAL array, dimension (LDU,M) */
 /* >          If JOBU = 'U', U contains the M-by-M orthogonal matrix U. */
 /* >          If JOBU = 'N', U is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDU */
 /* > \verbatim */
 /* >          LDU is INTEGER */
 /* >          The leading dimension of the array U. LDU >= f2cmax(1,M) if */
 /* >          JOBU = 'U'; LDU >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] V */
 /* > \verbatim */
 /* >          V is REAL array, dimension (LDV,P) */
 /* >          If JOBV = 'V', V contains the P-by-P orthogonal matrix V. */
 /* >          If JOBV = 'N', V is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDV */
 /* > \verbatim */
 /* >          LDV is INTEGER */
 /* >          The leading dimension of the array V. LDV >= f2cmax(1,P) if */
 /* >          JOBV = 'V'; LDV >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] Q */
 /* > \verbatim */
 /* >          Q is REAL array, dimension (LDQ,N) */
 /* >          If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. */
 /* >          If JOBQ = 'N', Q is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDQ */
 /* > \verbatim */
 /* >          LDQ is INTEGER */
 /* >          The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */
 /* >          JOBQ = 'Q'; LDQ >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
 /* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >          The dimension of the array WORK. */
 /* > */
 /* >          If LWORK = -1, then a workspace query is assumed; the routine */
 /* >          only calculates the optimal size of the WORK array, returns */
 /* >          this value as the first entry of the WORK array, and no error */
 /* >          message related to LWORK is issued by XERBLA. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* >          On exit, IWORK stores the sorting information. More */
 /* >          precisely, the following loop will sort ALPHA */
 /* >             for I = K+1, f2cmin(M,K+L) */
 /* >                 swap ALPHA(I) and ALPHA(IWORK(I)) */
 /* >             endfor */
 /* >          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit. */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
 /* >          > 0:  if INFO = 1, the Jacobi-type procedure failed to */
 /* >                converge.  For further details, see subroutine STGSJA. */
 /* > \endverbatim */

 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  TOLA    REAL */
 /* >  TOLB    REAL */
 /* >          TOLA and TOLB are the thresholds to determine the effective */
 /* >          rank of (A**T,B**T)**T. Generally, they are set to */
 /* >                   TOLA = MAX(M,N)*norm(A)*MACHEPS, */
 /* >                   TOLB = MAX(P,N)*norm(B)*MACHEPS. */
 /* >          The size of TOLA and TOLB may affect the size of backward */
 /* >          errors of the decomposition. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date August 2015 */

 /* > \ingroup realGEsing */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >     Ming Gu and Huan Ren, Computer Science Division, University of */
 /* >     California at Berkeley, USA */
 /* > */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* >  SGGSVD3 replaces the deprecated subroutine SGGSVD. */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int sggsvd3_(char *jobu, char *jobv, char *jobq, integer *m, 
 	integer *n, integer *p, integer *k, integer *l, real *a, integer *lda,
 	 real *b, integer *ldb, real *alpha, real *beta, real *u, integer *
 	ldu, real *v, integer *ldv, real *q, integer *ldq, real *work, 
 	integer *lwork, integer *iwork, integer *info)
 {
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, 
 	    u_offset, v_dim1, v_offset, i__1, i__2;

    /* Local variables */
    integer ibnd;
    real tola;
    integer isub;
    real tolb, unfl, temp, smax;
    integer ncallmycycle, i__, j;
    extern logical lsame_(char *, char *);
    real anorm, bnorm;
    logical wantq;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
 	    integer *);
    logical wantu, wantv;
    extern real slamch_(char *), slange_(char *, integer *, integer *,
 	     real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), stgsja_(
 	    char *, char *, char *, integer *, integer *, integer *, integer *
 	    , integer *, real *, integer *, real *, integer *, real *, real *,
 	     real *, real *, real *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int sggsvp3_(char *, char *, char *, integer *, 
 	    integer *, integer *, real *, integer *, real *, integer *, real *
 	    , real *, integer *, integer *, real *, integer *, real *, 
 	    integer *, real *, integer *, integer *, real *, real *, integer *
 	    , integer *);
    real ulp;


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     August 2015 */


 /*  ===================================================================== */


 /*     Decode and test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --alpha;
    --beta;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --work;
    --iwork;

    /* Function Body */
    wantu = lsame_(jobu, "U");
    wantv = lsame_(jobv, "V");
    wantq = lsame_(jobq, "Q");
    lquery = *lwork == -1;
    lwkopt = 1;

 /*     Test the input arguments */

    *info = 0;
    if (! (wantu || lsame_(jobu, "N"))) {
 	*info = -1;
    } else if (! (wantv || lsame_(jobv, "N"))) {
 	*info = -2;
    } else if (! (wantq || lsame_(jobq, "N"))) {
 	*info = -3;
    } else if (*m < 0) {
 	*info = -4;
    } else if (*n < 0) {
 	*info = -5;
    } else if (*p < 0) {
 	*info = -6;
    } else if (*lda < f2cmax(1,*m)) {
 	*info = -10;
    } else if (*ldb < f2cmax(1,*p)) {
 	*info = -12;
    } else if (*ldu < 1 || wantu && *ldu < *m) {
 	*info = -16;
    } else if (*ldv < 1 || wantv && *ldv < *p) {
 	*info = -18;
    } else if (*ldq < 1 || wantq && *ldq < *n) {
 	*info = -20;
    } else if (*lwork < 1 && ! lquery) {
 	*info = -24;
    }

 /*     Compute workspace */

    if (*info == 0) {
 	sggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], 
 		ldb, &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv,
 		 &q[q_offset], ldq, &iwork[1], &work[1], &work[1], &c_n1, 
 		info);
 	lwkopt = *n + (integer) work[1];
 /* Computing MAX */
 	i__1 = *n << 1;
 	lwkopt = f2cmax(i__1,lwkopt);
 	lwkopt = f2cmax(1,lwkopt);
 	work[1] = (real) lwkopt;
    }

    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGGSVD3", &i__1, (ftnlen)7);
 	return 0;
    }
    if (lquery) {
 	return 0;
    }

 /*     Compute the Frobenius norm of matrices A and B */

    anorm = slange_("1", m, n, &a[a_offset], lda, &work[1]);
    bnorm = slange_("1", p, n, &b[b_offset], ldb, &work[1]);

 /*     Get machine precision and set up threshold for determining */
 /*     the effective numerical rank of the matrices A and B. */

    ulp = slamch_("Precision");
    unfl = slamch_("Safe Minimum");
    tola = f2cmax(*m,*n) * f2cmax(anorm,unfl) * ulp;
    tolb = f2cmax(*p,*n) * f2cmax(bnorm,unfl) * ulp;

 /*     Preprocessing */

    i__1 = *lwork - *n;
    sggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], ldb, 
 	    &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, &q[
 	    q_offset], ldq, &iwork[1], &work[1], &work[*n + 1], &i__1, info);

 /*     Compute the GSVD of two upper "triangular" matrices */

    stgsja_(jobu, jobv, jobq, m, p, n, k, l, &a[a_offset], lda, &b[b_offset], 
 	    ldb, &tola, &tolb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[
 	    v_offset], ldv, &q[q_offset], ldq, &work[1], &ncallmycycle, info);

 /*     Sort the singular values and store the pivot indices in IWORK */
 /*     Copy ALPHA to WORK, then sort ALPHA in WORK */

    scopy_(n, &alpha[1], &c__1, &work[1], &c__1);
 /* Computing MIN */
    i__1 = *l, i__2 = *m - *k;
    ibnd = f2cmin(i__1,i__2);
    i__1 = ibnd;
    for (i__ = 1; i__ <= i__1; ++i__) {

 /*        Scan for largest ALPHA(K+I) */

 	isub = i__;
 	smax = work[*k + i__];
 	i__2 = ibnd;
 	for (j = i__ + 1; j <= i__2; ++j) {
 	    temp = work[*k + j];
 	    if (temp > smax) {
 		isub = j;
 		smax = temp;
 	    }
 /* L10: */
 	}
 	if (isub != i__) {
 	    work[*k + isub] = work[*k + i__];
 	    work[*k + i__] = smax;
 	    iwork[*k + i__] = *k + isub;
 	} else {
 	    iwork[*k + i__] = *k + i__;
 	}
 /* L20: */
    }

    work[1] = (real) lwkopt;
    return 0;

 /*     End of SGGSVD3 */

 } /* sggsvd3_ */

--- a/lapack-netlib/SRC/sggsvp3.c
+++ b/lapack-netlib/SRC/sggsvp3.c
--- a/lapack-netlib/SRC/sgsvj0.c
+++ b/lapack-netlib/SRC/sgsvj0.c
--- a/lapack-netlib/SRC/sgsvj1.c
+++ b/lapack-netlib/SRC/sgsvj1.c
--- a/lapack-netlib/SRC/sgtcon.c
+++ b/lapack-netlib/SRC/sgtcon.c
@@ -0,0 +1,649 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SGTCON */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGTCON + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtcon.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtcon.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtcon.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, */
 /*                          WORK, IWORK, INFO ) */

 /*       CHARACTER          NORM */
 /*       INTEGER            INFO, N */
 /*       REAL               ANORM, RCOND */
 /*       INTEGER            IPIV( * ), IWORK( * ) */
 /*       REAL               D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGTCON estimates the reciprocal of the condition number of a real */
 /* > tridiagonal matrix A using the LU factorization as computed by */
 /* > SGTTRF. */
 /* > */
 /* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
 /* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] NORM */
 /* > \verbatim */
 /* >          NORM is CHARACTER*1 */
 /* >          Specifies whether the 1-norm condition number or the */
 /* >          infinity-norm condition number is required: */
 /* >          = '1' or 'O':  1-norm; */
 /* >          = 'I':         Infinity-norm. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is REAL array, dimension (N-1) */
 /* >          The (n-1) multipliers that define the matrix L from the */
 /* >          LU factorization of A as computed by SGTTRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          The n diagonal elements of the upper triangular matrix U from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is REAL array, dimension (N-1) */
 /* >          The (n-1) elements of the first superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU2 */
 /* > \verbatim */
 /* >          DU2 is REAL array, dimension (N-2) */
 /* >          The (n-2) elements of the second superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ANORM */
 /* > \verbatim */
 /* >          ANORM is REAL */
 /* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
 /* >          If NORM = 'I', the infinity-norm of the original matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          The reciprocal of the condition number of the matrix A, */
 /* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
 /* >          estimate of the 1-norm of inv(A) computed in this routine. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (2*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgtcon_(char *norm, integer *n, real *dl, real *d__, 
 	real *du, real *du2, integer *ipiv, real *anorm, real *rcond, real *
 	work, integer *iwork, integer *info)
 {
    /* System generated locals */
    integer i__1;

    /* Local variables */
    integer kase, kase1, i__;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int slacn2_(integer *, real *, real *, integer *, 
 	    real *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    real ainvnm;
    logical onenrm;
    extern /* Subroutine */ int sgttrs_(char *, integer *, integer *, real *, 
 	    real *, real *, real *, integer *, real *, integer *, integer *);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input arguments. */

    /* Parameter adjustments */
    --iwork;
    --work;
    --ipiv;
    --du2;
    --du;
    --d__;
    --dl;

    /* Function Body */
    *info = 0;
    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
    if (! onenrm && ! lsame_(norm, "I")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*anorm < 0.f) {
 	*info = -8;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGTCON", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    *rcond = 0.f;
    if (*n == 0) {
 	*rcond = 1.f;
 	return 0;
    } else if (*anorm == 0.f) {
 	return 0;
    }

 /*     Check that D(1:N) is non-zero. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	if (d__[i__] == 0.f) {
 	    return 0;
 	}
 /* L10: */
    }

    ainvnm = 0.f;
    if (onenrm) {
 	kase1 = 1;
    } else {
 	kase1 = 2;
    }
    kase = 0;
 L20:
    slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
    if (kase != 0) {
 	if (kase == kase1) {

 /*           Multiply by inv(U)*inv(L). */

 	    sgttrs_("No transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1]
 		    , &ipiv[1], &work[1], n, info);
 	} else {

 /*           Multiply by inv(L**T)*inv(U**T). */

 	    sgttrs_("Transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1], &
 		    ipiv[1], &work[1], n, info);
 	}
 	goto L20;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.f) {
 	*rcond = 1.f / ainvnm / *anorm;
    }

    return 0;

 /*     End of SGTCON */

 } /* sgtcon_ */

--- a/lapack-netlib/SRC/sgtrfs.c
+++ b/lapack-netlib/SRC/sgtrfs.c
@@ -0,0 +1,913 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static real c_b18 = -1.f;
 static real c_b19 = 1.f;

 /* > \brief \b SGTRFS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGTRFS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtrfs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtrfs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtrfs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, */
 /*                          IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, */
 /*                          INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDB, LDX, N, NRHS */
 /*       INTEGER            IPIV( * ), IWORK( * ) */
 /*       REAL               B( LDB, * ), BERR( * ), D( * ), DF( * ), */
 /*      $                   DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), */
 /*      $                   FERR( * ), WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGTRFS improves the computed solution to a system of linear */
 /* > equations when the coefficient matrix is tridiagonal, and provides */
 /* > error bounds and backward error estimates for the solution. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is REAL array, dimension (N-1) */
 /* >          The (n-1) subdiagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          The diagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is REAL array, dimension (N-1) */
 /* >          The (n-1) superdiagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DLF */
 /* > \verbatim */
 /* >          DLF is REAL array, dimension (N-1) */
 /* >          The (n-1) multipliers that define the matrix L from the */
 /* >          LU factorization of A as computed by SGTTRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DF */
 /* > \verbatim */
 /* >          DF is REAL array, dimension (N) */
 /* >          The n diagonal elements of the upper triangular matrix U from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DUF */
 /* > \verbatim */
 /* >          DUF is REAL array, dimension (N-1) */
 /* >          The (n-1) elements of the first superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU2 */
 /* > \verbatim */
 /* >          DU2 is REAL array, dimension (N-2) */
 /* >          The (n-2) elements of the second superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          The right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] X */
 /* > \verbatim */
 /* >          X is REAL array, dimension (LDX,NRHS) */
 /* >          On entry, the solution matrix X, as computed by SGTTRS. */
 /* >          On exit, the improved solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is REAL array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is REAL array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (3*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /* > \par Internal Parameters: */
 /*  ========================= */
 /* > */
 /* > \verbatim */
 /* >  ITMAX is the maximum number of steps of iterative refinement. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgtrfs_(char *trans, integer *n, integer *nrhs, real *dl,
 	 real *d__, real *du, real *dlf, real *df, real *duf, real *du2, 
 	integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *
 	ferr, real *berr, real *work, integer *iwork, integer *info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2;
    real r__1, r__2, r__3, r__4;

    /* Local variables */
    integer kase;
    real safe1, safe2;
    integer i__, j;
    real s;
    extern logical lsame_(char *, char *);
    integer isave[3], count;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
 	    integer *), saxpy_(integer *, real *, real *, integer *, real *, 
 	    integer *), slacn2_(integer *, real *, real *, integer *, real *, 
 	    integer *, integer *);
    extern real slamch_(char *);
    integer nz;
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), slagtm_(
 	    char *, integer *, integer *, real *, real *, real *, real *, 
 	    real *, integer *, real *, real *, integer *);
    logical notran;
    char transn[1], transt[1];
    real lstres;
    extern /* Subroutine */ int sgttrs_(char *, integer *, integer *, real *, 
 	    real *, real *, real *, integer *, real *, integer *, integer *);
    real eps;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    --dlf;
    --df;
    --duf;
    --du2;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -13;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -15;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGTRFS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	i__1 = *nrhs;
 	for (j = 1; j <= i__1; ++j) {
 	    ferr[j] = 0.f;
 	    berr[j] = 0.f;
 /* L10: */
 	}
 	return 0;
    }

    if (notran) {
 	*(unsigned char *)transn = 'N';
 	*(unsigned char *)transt = 'T';
    } else {
 	*(unsigned char *)transn = 'T';
 	*(unsigned char *)transt = 'N';
    }

 /*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = 4;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

 /*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

 	count = 1;
 	lstres = 3.f;
 L20:

 /*        Loop until stopping criterion is satisfied. */

 /*        Compute residual R = B - op(A) * X, */
 /*        where op(A) = A, A**T, or A**H, depending on TRANS. */

 	scopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
 	slagtm_(trans, n, &c__1, &c_b18, &dl[1], &d__[1], &du[1], &x[j * 
 		x_dim1 + 1], ldx, &c_b19, &work[*n + 1], n);

 /*        Compute abs(op(A))*abs(x) + abs(b) for use in the backward */
 /*        error bound. */

 	if (notran) {
 	    if (*n == 1) {
 		work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[
 			1] * x[j * x_dim1 + 1], abs(r__2));
 	    } else {
 		work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[
 			1] * x[j * x_dim1 + 1], abs(r__2)) + (r__3 = du[1] * 
 			x[j * x_dim1 + 2], abs(r__3));
 		i__2 = *n - 1;
 		for (i__ = 2; i__ <= i__2; ++i__) {
 		    work[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1)) + (
 			    r__2 = dl[i__ - 1] * x[i__ - 1 + j * x_dim1], abs(
 			    r__2)) + (r__3 = d__[i__] * x[i__ + j * x_dim1], 
 			    abs(r__3)) + (r__4 = du[i__] * x[i__ + 1 + j * 
 			    x_dim1], abs(r__4));
 /* L30: */
 		}
 		work[*n] = (r__1 = b[*n + j * b_dim1], abs(r__1)) + (r__2 = 
 			dl[*n - 1] * x[*n - 1 + j * x_dim1], abs(r__2)) + (
 			r__3 = d__[*n] * x[*n + j * x_dim1], abs(r__3));
 	    }
 	} else {
 	    if (*n == 1) {
 		work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[
 			1] * x[j * x_dim1 + 1], abs(r__2));
 	    } else {
 		work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[
 			1] * x[j * x_dim1 + 1], abs(r__2)) + (r__3 = dl[1] * 
 			x[j * x_dim1 + 2], abs(r__3));
 		i__2 = *n - 1;
 		for (i__ = 2; i__ <= i__2; ++i__) {
 		    work[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1)) + (
 			    r__2 = du[i__ - 1] * x[i__ - 1 + j * x_dim1], abs(
 			    r__2)) + (r__3 = d__[i__] * x[i__ + j * x_dim1], 
 			    abs(r__3)) + (r__4 = dl[i__] * x[i__ + 1 + j * 
 			    x_dim1], abs(r__4));
 /* L40: */
 		}
 		work[*n] = (r__1 = b[*n + j * b_dim1], abs(r__1)) + (r__2 = 
 			du[*n - 1] * x[*n - 1 + j * x_dim1], abs(r__2)) + (
 			r__3 = d__[*n] * x[*n + j * x_dim1], abs(r__3));
 	    }
 	}

 /*        Compute componentwise relative backward error from formula */

 /*        f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */

 /*        where abs(Z) is the componentwise absolute value of the matrix */
 /*        or vector Z.  If the i-th component of the denominator is less */
 /*        than SAFE2, then SAFE1 is added to the i-th components of the */
 /*        numerator and denominator before dividing. */

 	s = 0.f;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (work[i__] > safe2) {
 /* Computing MAX */
 		r__2 = s, r__3 = (r__1 = work[*n + i__], abs(r__1)) / work[
 			i__];
 		s = f2cmax(r__2,r__3);
 	    } else {
 /* Computing MAX */
 		r__2 = s, r__3 = ((r__1 = work[*n + i__], abs(r__1)) + safe1) 
 			/ (work[i__] + safe1);
 		s = f2cmax(r__2,r__3);
 	    }
 /* L50: */
 	}
 	berr[j] = s;

 /*        Test stopping criterion. Continue iterating if */
 /*           1) The residual BERR(J) is larger than machine epsilon, and */
 /*           2) BERR(J) decreased by at least a factor of 2 during the */
 /*              last iteration, and */
 /*           3) At most ITMAX iterations tried. */

 	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

 /*           Update solution and try again. */

 	    sgttrs_(trans, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[
 		    1], &work[*n + 1], n, info);
 	    saxpy_(n, &c_b19, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
 		    ;
 	    lstres = berr[j];
 	    ++count;
 	    goto L20;
 	}

 /*        Bound error from formula */

 /*        norm(X - XTRUE) / norm(X) .le. FERR = */
 /*        norm( abs(inv(op(A)))* */
 /*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */

 /*        where */
 /*          norm(Z) is the magnitude of the largest component of Z */
 /*          inv(op(A)) is the inverse of op(A) */
 /*          abs(Z) is the componentwise absolute value of the matrix or */
 /*             vector Z */
 /*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
 /*          EPS is machine epsilon */

 /*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
 /*        is incremented by SAFE1 if the i-th component of */
 /*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */

 /*        Use SLACN2 to estimate the infinity-norm of the matrix */
 /*           inv(op(A)) * diag(W), */
 /*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */

 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    if (work[i__] > safe2) {
 		work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * 
 			work[i__];
 	    } else {
 		work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * 
 			work[i__] + safe1;
 	    }
 /* L60: */
 	}

 	kase = 0;
 L70:
 	slacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
 		kase, isave);
 	if (kase != 0) {
 	    if (kase == 1) {

 /*              Multiply by diag(W)*inv(op(A)**T). */

 		sgttrs_(transt, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &
 			ipiv[1], &work[*n + 1], n, info);
 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    work[*n + i__] = work[i__] * work[*n + i__];
 /* L80: */
 		}
 	    } else {

 /*              Multiply by inv(op(A))*diag(W). */

 		i__2 = *n;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    work[*n + i__] = work[i__] * work[*n + i__];
 /* L90: */
 		}
 		sgttrs_(transn, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &
 			ipiv[1], &work[*n + 1], n, info);
 	    }
 	    goto L70;
 	}

 /*        Normalize error. */

 	lstres = 0.f;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    r__2 = lstres, r__3 = (r__1 = x[i__ + j * x_dim1], abs(r__1));
 	    lstres = f2cmax(r__2,r__3);
 /* L100: */
 	}
 	if (lstres != 0.f) {
 	    ferr[j] /= lstres;
 	}

 /* L110: */
    }

    return 0;

 /*     End of SGTRFS */

 } /* sgtrfs_ */

--- a/lapack-netlib/SRC/sgtsv.c
+++ b/lapack-netlib/SRC/sgtsv.c
@@ -0,0 +1,745 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief <b> SGTSV computes the solution to system of linear equations A * X = B for GT matrices </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGTSV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtsv.f
 "> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtsv.f
 "> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtsv.f
 "> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) */

 /*       INTEGER            INFO, LDB, N, NRHS */
 /*       REAL               B( LDB, * ), D( * ), DL( * ), DU( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGTSV  solves the equation */
 /* > */
 /* >    A*X = B, */
 /* > */
 /* > where A is an n by n tridiagonal matrix, by Gaussian elimination with */
 /* > partial pivoting. */
 /* > */
 /* > Note that the equation  A**T*X = B  may be solved by interchanging the */
 /* > order of the arguments DU and DL. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DL */
 /* > \verbatim */
 /* >          DL is REAL array, dimension (N-1) */
 /* >          On entry, DL must contain the (n-1) sub-diagonal elements of */
 /* >          A. */
 /* > */
 /* >          On exit, DL is overwritten by the (n-2) elements of the */
 /* >          second super-diagonal of the upper triangular matrix U from */
 /* >          the LU factorization of A, in DL(1), ..., DL(n-2). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          On entry, D must contain the diagonal elements of A. */
 /* > */
 /* >          On exit, D is overwritten by the n diagonal elements of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DU */
 /* > \verbatim */
 /* >          DU is REAL array, dimension (N-1) */
 /* >          On entry, DU must contain the (n-1) super-diagonal elements */
 /* >          of A. */
 /* > */
 /* >          On exit, DU is overwritten by the (n-1) elements of the first */
 /* >          super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          On entry, the N by NRHS matrix of right hand side matrix B. */
 /* >          On exit, if INFO = 0, the N by NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0: successful exit */
 /* >          < 0: if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0: if INFO = i, U(i,i) is exactly zero, and the solution */
 /* >               has not been computed.  The factorization has not been */
 /* >               completed unless i = N. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGTsolve */

 /*  ===================================================================== */
 /* Subroutine */ int sgtsv_(integer *n, integer *nrhs, real *dl, real *d__, 
 	real *du, real *b, integer *ldb, integer *info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, i__1, i__2;
    real r__1, r__2;

    /* Local variables */
    real fact, temp;
    integer i__, j;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
    } else if (*nrhs < 0) {
 	*info = -2;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -7;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGTSV ", &i__1, (ftnlen)5);
 	return 0;
    }

    if (*n == 0) {
 	return 0;
    }

    if (*nrhs == 1) {
 	i__1 = *n - 2;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {

 /*              No row interchange required */

 		if (d__[i__] != 0.f) {
 		    fact = dl[i__] / d__[i__];
 		    d__[i__ + 1] -= fact * du[i__];
 		    b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1];
 		} else {
 		    *info = i__;
 		    return 0;
 		}
 		dl[i__] = 0.f;
 	    } else {

 /*              Interchange rows I and I+1 */

 		fact = d__[i__] / dl[i__];
 		d__[i__] = dl[i__];
 		temp = d__[i__ + 1];
 		d__[i__ + 1] = du[i__] - fact * temp;
 		dl[i__] = du[i__ + 1];
 		du[i__ + 1] = -fact * dl[i__];
 		du[i__] = temp;
 		temp = b[i__ + b_dim1];
 		b[i__ + b_dim1] = b[i__ + 1 + b_dim1];
 		b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1];
 	    }
 /* L10: */
 	}
 	if (*n > 1) {
 	    i__ = *n - 1;
 	    if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {
 		if (d__[i__] != 0.f) {
 		    fact = dl[i__] / d__[i__];
 		    d__[i__ + 1] -= fact * du[i__];
 		    b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1];
 		} else {
 		    *info = i__;
 		    return 0;
 		}
 	    } else {
 		fact = d__[i__] / dl[i__];
 		d__[i__] = dl[i__];
 		temp = d__[i__ + 1];
 		d__[i__ + 1] = du[i__] - fact * temp;
 		du[i__] = temp;
 		temp = b[i__ + b_dim1];
 		b[i__ + b_dim1] = b[i__ + 1 + b_dim1];
 		b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1];
 	    }
 	}
 	if (d__[*n] == 0.f) {
 	    *info = *n;
 	    return 0;
 	}
    } else {
 	i__1 = *n - 2;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {

 /*              No row interchange required */

 		if (d__[i__] != 0.f) {
 		    fact = dl[i__] / d__[i__];
 		    d__[i__ + 1] -= fact * du[i__];
 		    i__2 = *nrhs;
 		    for (j = 1; j <= i__2; ++j) {
 			b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1];
 /* L20: */
 		    }
 		} else {
 		    *info = i__;
 		    return 0;
 		}
 		dl[i__] = 0.f;
 	    } else {

 /*              Interchange rows I and I+1 */

 		fact = d__[i__] / dl[i__];
 		d__[i__] = dl[i__];
 		temp = d__[i__ + 1];
 		d__[i__ + 1] = du[i__] - fact * temp;
 		dl[i__] = du[i__ + 1];
 		du[i__ + 1] = -fact * dl[i__];
 		du[i__] = temp;
 		i__2 = *nrhs;
 		for (j = 1; j <= i__2; ++j) {
 		    temp = b[i__ + j * b_dim1];
 		    b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1];
 		    b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * 
 			    b_dim1];
 /* L30: */
 		}
 	    }
 /* L40: */
 	}
 	if (*n > 1) {
 	    i__ = *n - 1;
 	    if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {
 		if (d__[i__] != 0.f) {
 		    fact = dl[i__] / d__[i__];
 		    d__[i__ + 1] -= fact * du[i__];
 		    i__1 = *nrhs;
 		    for (j = 1; j <= i__1; ++j) {
 			b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1];
 /* L50: */
 		    }
 		} else {
 		    *info = i__;
 		    return 0;
 		}
 	    } else {
 		fact = d__[i__] / dl[i__];
 		d__[i__] = dl[i__];
 		temp = d__[i__ + 1];
 		d__[i__ + 1] = du[i__] - fact * temp;
 		du[i__] = temp;
 		i__1 = *nrhs;
 		for (j = 1; j <= i__1; ++j) {
 		    temp = b[i__ + j * b_dim1];
 		    b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1];
 		    b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * 
 			    b_dim1];
 /* L60: */
 		}
 	    }
 	}
 	if (d__[*n] == 0.f) {
 	    *info = *n;
 	    return 0;
 	}
    }

 /*     Back solve with the matrix U from the factorization. */

    if (*nrhs <= 2) {
 	j = 1;
 L70:
 	b[*n + j * b_dim1] /= d__[*n];
 	if (*n > 1) {
 	    b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] * b[
 		    *n + j * b_dim1]) / d__[*n - 1];
 	}
 	for (i__ = *n - 2; i__ >= 1; --i__) {
 	    b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + 1 
 		    + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1]) / d__[
 		    i__];
 /* L80: */
 	}
 	if (j < *nrhs) {
 	    ++j;
 	    goto L70;
 	}
    } else {
 	i__1 = *nrhs;
 	for (j = 1; j <= i__1; ++j) {
 	    b[*n + j * b_dim1] /= d__[*n];
 	    if (*n > 1) {
 		b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] 
 			* b[*n + j * b_dim1]) / d__[*n - 1];
 	    }
 	    for (i__ = *n - 2; i__ >= 1; --i__) {
 		b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ 
 			+ 1 + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1])
 			 / d__[i__];
 /* L90: */
 	    }
 /* L100: */
 	}
    }

    return 0;

 /*     End of SGTSV */

 } /* sgtsv_ */

--- a/lapack-netlib/SRC/sgtsvx.c
+++ b/lapack-netlib/SRC/sgtsvx.c
@@ -0,0 +1,828 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief <b> SGTSVX computes the solution to system of linear equations A * X = B for GT matrices </b> */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGTSVX + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtsvx.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtsvx.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtsvx.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, */
 /*                          DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, */
 /*                          WORK, IWORK, INFO ) */

 /*       CHARACTER          FACT, TRANS */
 /*       INTEGER            INFO, LDB, LDX, N, NRHS */
 /*       REAL               RCOND */
 /*       INTEGER            IPIV( * ), IWORK( * ) */
 /*       REAL               B( LDB, * ), BERR( * ), D( * ), DF( * ), */
 /*      $                   DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), */
 /*      $                   FERR( * ), WORK( * ), X( LDX, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGTSVX uses the LU factorization to compute the solution to a real */
 /* > system of linear equations A * X = B or A**T * X = B, */
 /* > where A is a tridiagonal matrix of order N and X and B are N-by-NRHS */
 /* > matrices. */
 /* > */
 /* > Error bounds on the solution and a condition estimate are also */
 /* > provided. */
 /* > \endverbatim */

 /* > \par Description: */
 /*  ================= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > The following steps are performed: */
 /* > */
 /* > 1. If FACT = 'N', the LU decomposition is used to factor the matrix A */
 /* >    as A = L * U, where L is a product of permutation and unit lower */
 /* >    bidiagonal matrices and U is upper triangular with nonzeros in */
 /* >    only the main diagonal and first two superdiagonals. */
 /* > */
 /* > 2. If some U(i,i)=0, so that U is exactly singular, then the routine */
 /* >    returns with INFO = i. Otherwise, the factored form of A is used */
 /* >    to estimate the condition number of the matrix A.  If the */
 /* >    reciprocal of the condition number is less than machine precision, */
 /* >    INFO = N+1 is returned as a warning, but the routine still goes on */
 /* >    to solve for X and compute error bounds as described below. */
 /* > */
 /* > 3. The system of equations is solved for X using the factored form */
 /* >    of A. */
 /* > */
 /* > 4. Iterative refinement is applied to improve the computed solution */
 /* >    matrix and calculate error bounds and backward error estimates */
 /* >    for it. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] FACT */
 /* > \verbatim */
 /* >          FACT is CHARACTER*1 */
 /* >          Specifies whether or not the factored form of A has been */
 /* >          supplied on entry. */
 /* >          = 'F':  DLF, DF, DUF, DU2, and IPIV contain the factored */
 /* >                  form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV */
 /* >                  will not be modified. */
 /* >          = 'N':  The matrix will be copied to DLF, DF, and DUF */
 /* >                  and factored. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations: */
 /* >          = 'N':  A * X = B     (No transpose) */
 /* >          = 'T':  A**T * X = B  (Transpose) */
 /* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is REAL array, dimension (N-1) */
 /* >          The (n-1) subdiagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          The n diagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is REAL array, dimension (N-1) */
 /* >          The (n-1) superdiagonal elements of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DLF */
 /* > \verbatim */
 /* >          DLF is REAL array, dimension (N-1) */
 /* >          If FACT = 'F', then DLF is an input argument and on entry */
 /* >          contains the (n-1) multipliers that define the matrix L from */
 /* >          the LU factorization of A as computed by SGTTRF. */
 /* > */
 /* >          If FACT = 'N', then DLF is an output argument and on exit */
 /* >          contains the (n-1) multipliers that define the matrix L from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DF */
 /* > \verbatim */
 /* >          DF is REAL array, dimension (N) */
 /* >          If FACT = 'F', then DF is an input argument and on entry */
 /* >          contains the n diagonal elements of the upper triangular */
 /* >          matrix U from the LU factorization of A. */
 /* > */
 /* >          If FACT = 'N', then DF is an output argument and on exit */
 /* >          contains the n diagonal elements of the upper triangular */
 /* >          matrix U from the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DUF */
 /* > \verbatim */
 /* >          DUF is REAL array, dimension (N-1) */
 /* >          If FACT = 'F', then DUF is an input argument and on entry */
 /* >          contains the (n-1) elements of the first superdiagonal of U. */
 /* > */
 /* >          If FACT = 'N', then DUF is an output argument and on exit */
 /* >          contains the (n-1) elements of the first superdiagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DU2 */
 /* > \verbatim */
 /* >          DU2 is REAL array, dimension (N-2) */
 /* >          If FACT = 'F', then DU2 is an input argument and on entry */
 /* >          contains the (n-2) elements of the second superdiagonal of */
 /* >          U. */
 /* > */
 /* >          If FACT = 'N', then DU2 is an output argument and on exit */
 /* >          contains the (n-2) elements of the second superdiagonal of */
 /* >          U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          If FACT = 'F', then IPIV is an input argument and on entry */
 /* >          contains the pivot indices from the LU factorization of A as */
 /* >          computed by SGTTRF. */
 /* > */
 /* >          If FACT = 'N', then IPIV is an output argument and on exit */
 /* >          contains the pivot indices from the LU factorization of A; */
 /* >          row i of the matrix was interchanged with row IPIV(i). */
 /* >          IPIV(i) will always be either i or i+1; IPIV(i) = i indicates */
 /* >          a row interchange was not required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          The N-by-NRHS right hand side matrix B. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] X */
 /* > \verbatim */
 /* >          X is REAL array, dimension (LDX,NRHS) */
 /* >          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDX */
 /* > \verbatim */
 /* >          LDX is INTEGER */
 /* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] RCOND */
 /* > \verbatim */
 /* >          RCOND is REAL */
 /* >          The estimate of the reciprocal condition number of the matrix */
 /* >          A.  If RCOND is less than the machine precision (in */
 /* >          particular, if RCOND = 0), the matrix is singular to working */
 /* >          precision.  This condition is indicated by a return code of */
 /* >          INFO > 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] FERR */
 /* > \verbatim */
 /* >          FERR is REAL array, dimension (NRHS) */
 /* >          The estimated forward error bound for each solution vector */
 /* >          X(j) (the j-th column of the solution matrix X). */
 /* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
 /* >          is an estimated upper bound for the magnitude of the largest */
 /* >          element in (X(j) - XTRUE) divided by the magnitude of the */
 /* >          largest element in X(j).  The estimate is as reliable as */
 /* >          the estimate for RCOND, and is almost always a slight */
 /* >          overestimate of the true error. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] BERR */
 /* > \verbatim */
 /* >          BERR is REAL array, dimension (NRHS) */
 /* >          The componentwise relative backward error of each solution */
 /* >          vector X(j) (i.e., the smallest relative change in */
 /* >          any element of A or B that makes X(j) an exact solution). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (3*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, and i is */
 /* >                <= N:  U(i,i) is exactly zero.  The factorization */
 /* >                       has not been completed unless i = N, but the */
 /* >                       factor U is exactly singular, so the solution */
 /* >                       and error bounds could not be computed. */
 /* >                       RCOND = 0 is returned. */
 /* >                = N+1: U is nonsingular, but RCOND is less than machine */
 /* >                       precision, meaning that the matrix is singular */
 /* >                       to working precision.  Nevertheless, the */
 /* >                       solution and error bounds are computed because */
 /* >                       there are a number of situations where the */
 /* >                       computed solution can be more accurate than the */
 /* >                       value of RCOND would suggest. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGTsolve */

 /*  ===================================================================== */
 /* Subroutine */ int sgtsvx_(char *fact, char *trans, integer *n, integer *
 	nrhs, real *dl, real *d__, real *du, real *dlf, real *df, real *duf, 
 	real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer *
 	ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, 
 	integer *info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1;

    /* Local variables */
    char norm[1];
    extern logical lsame_(char *, char *);
    real anorm;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
 	    integer *);
    extern real slamch_(char *);
    logical nofact;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern real slangt_(char *, integer *, real *, real *, real *);
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
 	    integer *, real *, integer *), sgtcon_(char *, integer *, 
 	    real *, real *, real *, real *, integer *, real *, real *, real *,
 	     integer *, integer *);
    logical notran;
    extern /* Subroutine */ int sgtrfs_(char *, integer *, integer *, real *, 
 	    real *, real *, real *, real *, real *, real *, integer *, real *,
 	     integer *, real *, integer *, real *, real *, real *, integer *, 
 	    integer *), sgttrf_(integer *, real *, real *, real *, 
 	    real *, integer *, integer *), sgttrs_(char *, integer *, integer 
 	    *, real *, real *, real *, real *, integer *, real *, integer *, 
 	    integer *);


 /*  -- LAPACK driver routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    --dlf;
    --df;
    --duf;
    --du2;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    nofact = lsame_(fact, "N");
    notran = lsame_(trans, "N");
    if (! nofact && ! lsame_(fact, "F")) {
 	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! 
 	    lsame_(trans, "C")) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*nrhs < 0) {
 	*info = -4;
    } else if (*ldb < f2cmax(1,*n)) {
 	*info = -14;
    } else if (*ldx < f2cmax(1,*n)) {
 	*info = -16;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGTSVX", &i__1, (ftnlen)6);
 	return 0;
    }

    if (nofact) {

 /*        Compute the LU factorization of A. */

 	scopy_(n, &d__[1], &c__1, &df[1], &c__1);
 	if (*n > 1) {
 	    i__1 = *n - 1;
 	    scopy_(&i__1, &dl[1], &c__1, &dlf[1], &c__1);
 	    i__1 = *n - 1;
 	    scopy_(&i__1, &du[1], &c__1, &duf[1], &c__1);
 	}
 	sgttrf_(n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], info);

 /*        Return if INFO is non-zero. */

 	if (*info > 0) {
 	    *rcond = 0.f;
 	    return 0;
 	}
    }

 /*     Compute the norm of the matrix A. */

    if (notran) {
 	*(unsigned char *)norm = '1';
    } else {
 	*(unsigned char *)norm = 'I';
    }
    anorm = slangt_(norm, n, &dl[1], &d__[1], &du[1]);

 /*     Compute the reciprocal of the condition number of A. */

    sgtcon_(norm, n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &anorm, 
 	    rcond, &work[1], &iwork[1], info);

 /*     Compute the solution vectors X. */

    slacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
    sgttrs_(trans, n, nrhs, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &x[
 	    x_offset], ldx, info);

 /*     Use iterative refinement to improve the computed solutions and */
 /*     compute error bounds and backward error estimates for them. */

    sgtrfs_(trans, n, nrhs, &dl[1], &d__[1], &du[1], &dlf[1], &df[1], &duf[1],
 	     &du2[1], &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1]
 	    , &berr[1], &work[1], &iwork[1], info);

 /*     Set INFO = N+1 if the matrix is singular to working precision. */

    if (*rcond < slamch_("Epsilon")) {
 	*info = *n + 1;
    }

    return 0;

 /*     End of SGTSVX */

 } /* sgtsvx_ */

--- a/lapack-netlib/SRC/sgttrf.c
+++ b/lapack-netlib/SRC/sgttrf.c
@@ -0,0 +1,632 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGTTRF */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGTTRF + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgttrf.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgttrf.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgttrf.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) */

 /*       INTEGER            INFO, N */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               D( * ), DL( * ), DU( * ), DU2( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGTTRF computes an LU factorization of a real tridiagonal matrix A */
 /* > using elimination with partial pivoting and row interchanges. */
 /* > */
 /* > The factorization has the form */
 /* >    A = L * U */
 /* > where L is a product of permutation and unit lower bidiagonal */
 /* > matrices and U is upper triangular with nonzeros in only the main */
 /* > diagonal and first two superdiagonals. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DL */
 /* > \verbatim */
 /* >          DL is REAL array, dimension (N-1) */
 /* >          On entry, DL must contain the (n-1) sub-diagonal elements of */
 /* >          A. */
 /* > */
 /* >          On exit, DL is overwritten by the (n-1) multipliers that */
 /* >          define the matrix L from the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          On entry, D must contain the diagonal elements of A. */
 /* > */
 /* >          On exit, D is overwritten by the n diagonal elements of the */
 /* >          upper triangular matrix U from the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] DU */
 /* > \verbatim */
 /* >          DU is REAL array, dimension (N-1) */
 /* >          On entry, DU must contain the (n-1) super-diagonal elements */
 /* >          of A. */
 /* > */
 /* >          On exit, DU is overwritten by the (n-1) elements of the first */
 /* >          super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] DU2 */
 /* > \verbatim */
 /* >          DU2 is REAL array, dimension (N-2) */
 /* >          On exit, DU2 is overwritten by the (n-2) elements of the */
 /* >          second super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
 /* >          > 0:  if INFO = k, U(k,k) is exactly zero. The factorization */
 /* >                has been completed, but the factor U is exactly */
 /* >                singular, and division by zero will occur if it is used */
 /* >                to solve a system of equations. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgttrf_(integer *n, real *dl, real *d__, real *du, real *
 	du2, integer *ipiv, integer *info)
 {
    /* System generated locals */
    integer i__1;
    real r__1, r__2;

    /* Local variables */
    real fact, temp;
    integer i__;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --ipiv;
    --du2;
    --du;
    --d__;
    --dl;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
 	*info = -1;
 	i__1 = -(*info);
 	xerbla_("SGTTRF", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0) {
 	return 0;
    }

 /*     Initialize IPIV(i) = i and DU2(I) = 0 */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	ipiv[i__] = i__;
 /* L10: */
    }
    i__1 = *n - 2;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	du2[i__] = 0.f;
 /* L20: */
    }

    i__1 = *n - 2;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {

 /*           No row interchange required, eliminate DL(I) */

 	    if (d__[i__] != 0.f) {
 		fact = dl[i__] / d__[i__];
 		dl[i__] = fact;
 		d__[i__ + 1] -= fact * du[i__];
 	    }
 	} else {

 /*           Interchange rows I and I+1, eliminate DL(I) */

 	    fact = d__[i__] / dl[i__];
 	    d__[i__] = dl[i__];
 	    dl[i__] = fact;
 	    temp = du[i__];
 	    du[i__] = d__[i__ + 1];
 	    d__[i__ + 1] = temp - fact * d__[i__ + 1];
 	    du2[i__] = du[i__ + 1];
 	    du[i__ + 1] = -fact * du[i__ + 1];
 	    ipiv[i__] = i__ + 1;
 	}
 /* L30: */
    }
    if (*n > 1) {
 	i__ = *n - 1;
 	if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {
 	    if (d__[i__] != 0.f) {
 		fact = dl[i__] / d__[i__];
 		dl[i__] = fact;
 		d__[i__ + 1] -= fact * du[i__];
 	    }
 	} else {
 	    fact = d__[i__] / dl[i__];
 	    d__[i__] = dl[i__];
 	    dl[i__] = fact;
 	    temp = du[i__];
 	    du[i__] = d__[i__ + 1];
 	    d__[i__ + 1] = temp - fact * d__[i__ + 1];
 	    ipiv[i__] = i__ + 1;
 	}
    }

 /*     Check for a zero on the diagonal of U. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
 	if (d__[i__] == 0.f) {
 	    *info = i__;
 	    goto L50;
 	}
 /* L40: */
    }
 L50:

    return 0;

 /*     End of SGTTRF */

 } /* sgttrf_ */

--- a/lapack-netlib/SRC/sgttrs.c
+++ b/lapack-netlib/SRC/sgttrs.c
@@ -0,0 +1,632 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;
 static integer c_n1 = -1;

 /* > \brief \b SGTTRS */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGTTRS + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgttrs.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgttrs.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgttrs.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, */
 /*                          INFO ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            INFO, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGTTRS solves one of the systems of equations */
 /* >    A*X = B  or  A**T*X = B, */
 /* > with a tridiagonal matrix A using the LU factorization computed */
 /* > by SGTTRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >          Specifies the form of the system of equations. */
 /* >          = 'N':  A * X = B  (No transpose) */
 /* >          = 'T':  A**T* X = B  (Transpose) */
 /* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is REAL array, dimension (N-1) */
 /* >          The (n-1) multipliers that define the matrix L from the */
 /* >          LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          The n diagonal elements of the upper triangular matrix U from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is REAL array, dimension (N-1) */
 /* >          The (n-1) elements of the first super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU2 */
 /* > \verbatim */
 /* >          DU2 is REAL array, dimension (N-2) */
 /* >          The (n-2) elements of the second super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          On entry, the matrix of right hand side vectors B. */
 /* >          On exit, B is overwritten by the solution vectors X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgttrs_(char *trans, integer *n, integer *nrhs, real *dl,
 	 real *d__, real *du, real *du2, integer *ipiv, real *b, integer *ldb,
 	 integer *info)
 {
    /* System generated locals */
    integer b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer j, jb, nb;
    extern /* Subroutine */ int sgtts2_(integer *, integer *, integer *, real 
 	    *, real *, real *, real *, integer *, real *, integer *), xerbla_(
 	    char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    integer itrans;
    logical notran;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    --du2;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    notran = *(unsigned char *)trans == 'N' || *(unsigned char *)trans == 'n';
    if (! notran && ! (*(unsigned char *)trans == 'T' || *(unsigned char *)
 	    trans == 't') && ! (*(unsigned char *)trans == 'C' || *(unsigned 
 	    char *)trans == 'c')) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*nrhs < 0) {
 	*info = -3;
    } else if (*ldb < f2cmax(*n,1)) {
 	*info = -10;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGTTRS", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
 	return 0;
    }

 /*     Decode TRANS */

    if (notran) {
 	itrans = 0;
    } else {
 	itrans = 1;
    }

 /*     Determine the number of right-hand sides to solve at a time. */

    if (*nrhs == 1) {
 	nb = 1;
    } else {
 /* Computing MAX */
 	i__1 = 1, i__2 = ilaenv_(&c__1, "SGTTRS", trans, n, nrhs, &c_n1, &
 		c_n1, (ftnlen)6, (ftnlen)1);
 	nb = f2cmax(i__1,i__2);
    }

    if (nb >= *nrhs) {
 	sgtts2_(&itrans, n, nrhs, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[1], 
 		&b[b_offset], ldb);
    } else {
 	i__1 = *nrhs;
 	i__2 = nb;
 	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 /* Computing MIN */
 	    i__3 = *nrhs - j + 1;
 	    jb = f2cmin(i__3,nb);
 	    sgtts2_(&itrans, n, &jb, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[
 		    1], &b[j * b_dim1 + 1], ldb);
 /* L10: */
 	}
    }

 /*     End of SGTTRS */

    return 0;
 } /* sgttrs_ */

--- a/lapack-netlib/SRC/sgtts2.c
+++ b/lapack-netlib/SRC/sgtts2.c
@@ -0,0 +1,706 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SGTTS2 solves a system of linear equations with a tridiagonal matrix using the LU factorization
 computed by sgttrf. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SGTTS2 + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtts2.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtts2.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtts2.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) */

 /*       INTEGER            ITRANS, LDB, N, NRHS */
 /*       INTEGER            IPIV( * ) */
 /*       REAL               B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SGTTS2 solves one of the systems of equations */
 /* >    A*X = B  or  A**T*X = B, */
 /* > with a tridiagonal matrix A using the LU factorization computed */
 /* > by SGTTRF. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] ITRANS */
 /* > \verbatim */
 /* >          ITRANS is INTEGER */
 /* >          Specifies the form of the system of equations. */
 /* >          = 0:  A * X = B  (No transpose) */
 /* >          = 1:  A**T* X = B  (Transpose) */
 /* >          = 2:  A**T* X = B  (Conjugate transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NRHS */
 /* > \verbatim */
 /* >          NRHS is INTEGER */
 /* >          The number of right hand sides, i.e., the number of columns */
 /* >          of the matrix B.  NRHS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DL */
 /* > \verbatim */
 /* >          DL is REAL array, dimension (N-1) */
 /* >          The (n-1) multipliers that define the matrix L from the */
 /* >          LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] D */
 /* > \verbatim */
 /* >          D is REAL array, dimension (N) */
 /* >          The n diagonal elements of the upper triangular matrix U from */
 /* >          the LU factorization of A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU */
 /* > \verbatim */
 /* >          DU is REAL array, dimension (N-1) */
 /* >          The (n-1) elements of the first super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] DU2 */
 /* > \verbatim */
 /* >          DU2 is REAL array, dimension (N-2) */
 /* >          The (n-2) elements of the second super-diagonal of U. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
 /* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
 /* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
 /* >          required. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] B */
 /* > \verbatim */
 /* >          B is REAL array, dimension (LDB,NRHS) */
 /* >          On entry, the matrix of right hand side vectors B. */
 /* >          On exit, B is overwritten by the solution vectors X. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDB */
 /* > \verbatim */
 /* >          LDB is INTEGER */
 /* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGTcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sgtts2_(integer *itrans, integer *n, integer *nrhs, real 
 	*dl, real *d__, real *du, real *du2, integer *ipiv, real *b, integer *
 	ldb)
 {
    /* System generated locals */
    integer b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    real temp;
    integer i__, j, ip;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Quick return if possible */

    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    --du2;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    if (*n == 0 || *nrhs == 0) {
 	return 0;
    }

    if (*itrans == 0) {

 /*        Solve A*X = B using the LU factorization of A, */
 /*        overwriting each right hand side vector with its solution. */

 	if (*nrhs <= 1) {
 	    j = 1;
 L10:

 /*           Solve L*x = b. */

 	    i__1 = *n - 1;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		ip = ipiv[i__];
 		temp = b[i__ + 1 - ip + i__ + j * b_dim1] - dl[i__] * b[ip + 
 			j * b_dim1];
 		b[i__ + j * b_dim1] = b[ip + j * b_dim1];
 		b[i__ + 1 + j * b_dim1] = temp;
 /* L20: */
 	    }

 /*           Solve U*x = b. */

 	    b[*n + j * b_dim1] /= d__[*n];
 	    if (*n > 1) {
 		b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] 
 			* b[*n + j * b_dim1]) / d__[*n - 1];
 	    }
 	    for (i__ = *n - 2; i__ >= 1; --i__) {
 		b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ 
 			+ 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j * b_dim1]
 			) / d__[i__];
 /* L30: */
 	    }
 	    if (j < *nrhs) {
 		++j;
 		goto L10;
 	    }
 	} else {
 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {

 /*              Solve L*x = b. */

 		i__2 = *n - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    if (ipiv[i__] == i__) {
 			b[i__ + 1 + j * b_dim1] -= dl[i__] * b[i__ + j * 
 				b_dim1];
 		    } else {
 			temp = b[i__ + j * b_dim1];
 			b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1];
 			b[i__ + 1 + j * b_dim1] = temp - dl[i__] * b[i__ + j *
 				 b_dim1];
 		    }
 /* L40: */
 		}

 /*              Solve U*x = b. */

 		b[*n + j * b_dim1] /= d__[*n];
 		if (*n > 1) {
 		    b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n 
 			    - 1] * b[*n + j * b_dim1]) / d__[*n - 1];
 		}
 		for (i__ = *n - 2; i__ >= 1; --i__) {
 		    b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[
 			    i__ + 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j *
 			     b_dim1]) / d__[i__];
 /* L50: */
 		}
 /* L60: */
 	    }
 	}
    } else {

 /*        Solve A**T * X = B. */

 	if (*nrhs <= 1) {

 /*           Solve U**T*x = b. */

 	    j = 1;
 L70:
 	    b[j * b_dim1 + 1] /= d__[1];
 	    if (*n > 1) {
 		b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * b_dim1 
 			+ 1]) / d__[2];
 	    }
 	    i__1 = *n;
 	    for (i__ = 3; i__ <= i__1; ++i__) {
 		b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] * b[
 			i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - 2 + j *
 			 b_dim1]) / d__[i__];
 /* L80: */
 	    }

 /*           Solve L**T*x = b. */

 	    for (i__ = *n - 1; i__ >= 1; --i__) {
 		ip = ipiv[i__];
 		temp = b[i__ + j * b_dim1] - dl[i__] * b[i__ + 1 + j * b_dim1]
 			;
 		b[i__ + j * b_dim1] = b[ip + j * b_dim1];
 		b[ip + j * b_dim1] = temp;
 /* L90: */
 	    }
 	    if (j < *nrhs) {
 		++j;
 		goto L70;
 	    }

 	} else {
 	    i__1 = *nrhs;
 	    for (j = 1; j <= i__1; ++j) {

 /*              Solve U**T*x = b. */

 		b[j * b_dim1 + 1] /= d__[1];
 		if (*n > 1) {
 		    b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * 
 			    b_dim1 + 1]) / d__[2];
 		}
 		i__2 = *n;
 		for (i__ = 3; i__ <= i__2; ++i__) {
 		    b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] *
 			     b[i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - 
 			    2 + j * b_dim1]) / d__[i__];
 /* L100: */
 		}
 		for (i__ = *n - 1; i__ >= 1; --i__) {
 		    if (ipiv[i__] == i__) {
 			b[i__ + j * b_dim1] -= dl[i__] * b[i__ + 1 + j * 
 				b_dim1];
 		    } else {
 			temp = b[i__ + 1 + j * b_dim1];
 			b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - dl[
 				i__] * temp;
 			b[i__ + j * b_dim1] = temp;
 		    }
 /* L110: */
 		}
 /* L120: */
 	    }
 	}
    }

 /*     End of SGTTS2 */

    return 0;
 } /* sgtts2_ */

--- a/lapack-netlib/SRC/shgeqz.c
+++ b/lapack-netlib/SRC/shgeqz.c
--- a/lapack-netlib/SRC/shsein.c
+++ b/lapack-netlib/SRC/shsein.c
@@ -0,0 +1,968 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static logical c_false = FALSE_;
 static logical c_true = TRUE_;

 /* > \brief \b SHSEIN */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SHSEIN + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/shsein.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/shsein.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/shsein.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI, */
 /*                          VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL, */
 /*                          IFAILR, INFO ) */

 /*       CHARACTER          EIGSRC, INITV, SIDE */
 /*       INTEGER            INFO, LDH, LDVL, LDVR, M, MM, N */
 /*       LOGICAL            SELECT( * ) */
 /*       INTEGER            IFAILL( * ), IFAILR( * ) */
 /*       REAL               H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), */
 /*      $                   WI( * ), WORK( * ), WR( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SHSEIN uses inverse iteration to find specified right and/or left */
 /* > eigenvectors of a real upper Hessenberg matrix H. */
 /* > */
 /* > The right eigenvector x and the left eigenvector y of the matrix H */
 /* > corresponding to an eigenvalue w are defined by: */
 /* > */
 /* >              H * x = w * x,     y**h * H = w * y**h */
 /* > */
 /* > where y**h denotes the conjugate transpose of the vector y. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] SIDE */
 /* > \verbatim */
 /* >          SIDE is CHARACTER*1 */
 /* >          = 'R': compute right eigenvectors only; */
 /* >          = 'L': compute left eigenvectors only; */
 /* >          = 'B': compute both right and left eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] EIGSRC */
 /* > \verbatim */
 /* >          EIGSRC is CHARACTER*1 */
 /* >          Specifies the source of eigenvalues supplied in (WR,WI): */
 /* >          = 'Q': the eigenvalues were found using SHSEQR; thus, if */
 /* >                 H has zero subdiagonal elements, and so is */
 /* >                 block-triangular, then the j-th eigenvalue can be */
 /* >                 assumed to be an eigenvalue of the block containing */
 /* >                 the j-th row/column.  This property allows SHSEIN to */
 /* >                 perform inverse iteration on just one diagonal block. */
 /* >          = 'N': no assumptions are made on the correspondence */
 /* >                 between eigenvalues and diagonal blocks.  In this */
 /* >                 case, SHSEIN must always perform inverse iteration */
 /* >                 using the whole matrix H. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] INITV */
 /* > \verbatim */
 /* >          INITV is CHARACTER*1 */
 /* >          = 'N': no initial vectors are supplied; */
 /* >          = 'U': user-supplied initial vectors are stored in the arrays */
 /* >                 VL and/or VR. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] SELECT */
 /* > \verbatim */
 /* >          SELECT is LOGICAL array, dimension (N) */
 /* >          Specifies the eigenvectors to be computed. To select the */
 /* >          real eigenvector corresponding to a real eigenvalue WR(j), */
 /* >          SELECT(j) must be set to .TRUE.. To select the complex */
 /* >          eigenvector corresponding to a complex eigenvalue */
 /* >          (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)), */
 /* >          either SELECT(j) or SELECT(j+1) or both must be set to */
 /* >          .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is */
 /* >          .FALSE.. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >          The order of the matrix H.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] H */
 /* > \verbatim */
 /* >          H is REAL array, dimension (LDH,N) */
 /* >          The upper Hessenberg matrix H. */
 /* >          If a NaN is detected in H, the routine will return with INFO=-6. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDH */
 /* > \verbatim */
 /* >          LDH is INTEGER */
 /* >          The leading dimension of the array H.  LDH >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] WR */
 /* > \verbatim */
 /* >          WR is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] WI */
 /* > \verbatim */
 /* >          WI is REAL array, dimension (N) */
 /* > */
 /* >          On entry, the real and imaginary parts of the eigenvalues of */
 /* >          H; a complex conjugate pair of eigenvalues must be stored in */
 /* >          consecutive elements of WR and WI. */
 /* >          On exit, WR may have been altered since close eigenvalues */
 /* >          are perturbed slightly in searching for independent */
 /* >          eigenvectors. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] VL */
 /* > \verbatim */
 /* >          VL is REAL array, dimension (LDVL,MM) */
 /* >          On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must */
 /* >          contain starting vectors for the inverse iteration for the */
 /* >          left eigenvectors; the starting vector for each eigenvector */
 /* >          must be in the same column(s) in which the eigenvector will */
 /* >          be stored. */
 /* >          On exit, if SIDE = 'L' or 'B', the left eigenvectors */
 /* >          specified by SELECT will be stored consecutively in the */
 /* >          columns of VL, in the same order as their eigenvalues. A */
 /* >          complex eigenvector corresponding to a complex eigenvalue is */
 /* >          stored in two consecutive columns, the first holding the real */
 /* >          part and the second the imaginary part. */
 /* >          If SIDE = 'R', VL is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDVL */
 /* > \verbatim */
 /* >          LDVL is INTEGER */
 /* >          The leading dimension of the array VL. */
 /* >          LDVL >= f2cmax(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] VR */
 /* > \verbatim */
 /* >          VR is REAL array, dimension (LDVR,MM) */
 /* >          On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must */
 /* >          contain starting vectors for the inverse iteration for the */
 /* >          right eigenvectors; the starting vector for each eigenvector */
 /* >          must be in the same column(s) in which the eigenvector will */
 /* >          be stored. */
 /* >          On exit, if SIDE = 'R' or 'B', the right eigenvectors */
 /* >          specified by SELECT will be stored consecutively in the */
 /* >          columns of VR, in the same order as their eigenvalues. A */
 /* >          complex eigenvector corresponding to a complex eigenvalue is */
 /* >          stored in two consecutive columns, the first holding the real */
 /* >          part and the second the imaginary part. */
 /* >          If SIDE = 'L', VR is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDVR */
 /* > \verbatim */
 /* >          LDVR is INTEGER */
 /* >          The leading dimension of the array VR. */
 /* >          LDVR >= f2cmax(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] MM */
 /* > \verbatim */
 /* >          MM is INTEGER */
 /* >          The number of columns in the arrays VL and/or VR. MM >= M. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >          The number of columns in the arrays VL and/or VR required to */
 /* >          store the eigenvectors; each selected real eigenvector */
 /* >          occupies one column and each selected complex eigenvector */
 /* >          occupies two columns. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension ((N+2)*N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IFAILL */
 /* > \verbatim */
 /* >          IFAILL is INTEGER array, dimension (MM) */
 /* >          If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left */
 /* >          eigenvector in the i-th column of VL (corresponding to the */
 /* >          eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the */
 /* >          eigenvector converged satisfactorily. If the i-th and (i+1)th */
 /* >          columns of VL hold a complex eigenvector, then IFAILL(i) and */
 /* >          IFAILL(i+1) are set to the same value. */
 /* >          If SIDE = 'R', IFAILL is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IFAILR */
 /* > \verbatim */
 /* >          IFAILR is INTEGER array, dimension (MM) */
 /* >          If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right */
 /* >          eigenvector in the i-th column of VR (corresponding to the */
 /* >          eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the */
 /* >          eigenvector converged satisfactorily. If the i-th and (i+1)th */
 /* >          columns of VR hold a complex eigenvector, then IFAILR(i) and */
 /* >          IFAILR(i+1) are set to the same value. */
 /* >          If SIDE = 'L', IFAILR is not referenced. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >          = 0:  successful exit */
 /* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
 /* >          > 0:  if INFO = i, i is the number of eigenvectors which */
 /* >                failed to converge; see IFAILL and IFAILR for further */
 /* >                details. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realOTHERcomputational */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >  Each eigenvector is normalized so that the element of largest */
 /* >  magnitude has magnitude 1; here the magnitude of a complex number */
 /* >  (x,y) is taken to be |x|+|y|. */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
 /* Subroutine */ int shsein_(char *side, char *eigsrc, char *initv, logical *
 	select, integer *n, real *h__, integer *ldh, real *wr, real *wi, real 
 	*vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, 
 	real *work, integer *ifaill, integer *ifailr, integer *info)
 {
    /* System generated locals */
    integer h_dim1, h_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
 	    i__2;
    real r__1, r__2;

    /* Local variables */
    logical pair;
    real unfl;
    integer i__, k;
    extern logical lsame_(char *, char *);
    integer iinfo;
    logical leftv, bothv;
    real hnorm;
    integer kl, kr;
    extern real slamch_(char *);
    extern /* Subroutine */ int slaein_(logical *, logical *, integer *, real 
 	    *, integer *, real *, real *, real *, real *, real *, integer *, 
 	    real *, real *, real *, real *, integer *), xerbla_(char *, 
 	    integer *, ftnlen);
    real bignum;
    extern real slanhs_(char *, integer *, real *, integer *, real *);
    extern logical sisnan_(real *);
    logical noinit;
    integer ldwork;
    logical rightv, fromqr;
    real smlnum;
    integer kln, ksi;
    real wki;
    integer ksr;
    real ulp, wkr, eps3;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     Decode and test the input parameters. */

    /* Parameter adjustments */
    --select;
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1 * 1;
    h__ -= h_offset;
    --wr;
    --wi;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1 * 1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1 * 1;
    vr -= vr_offset;
    --work;
    --ifaill;
    --ifailr;

    /* Function Body */
    bothv = lsame_(side, "B");
    rightv = lsame_(side, "R") || bothv;
    leftv = lsame_(side, "L") || bothv;

    fromqr = lsame_(eigsrc, "Q");

    noinit = lsame_(initv, "N");

 /*     Set M to the number of columns required to store the selected */
 /*     eigenvectors, and standardize the array SELECT. */

    *m = 0;
    pair = FALSE_;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {
 	if (pair) {
 	    pair = FALSE_;
 	    select[k] = FALSE_;
 	} else {
 	    if (wi[k] == 0.f) {
 		if (select[k]) {
 		    ++(*m);
 		}
 	    } else {
 		pair = TRUE_;
 		if (select[k] || select[k + 1]) {
 		    select[k] = TRUE_;
 		    *m += 2;
 		}
 	    }
 	}
 /* L10: */
    }

    *info = 0;
    if (! rightv && ! leftv) {
 	*info = -1;
    } else if (! fromqr && ! lsame_(eigsrc, "N")) {
 	*info = -2;
    } else if (! noinit && ! lsame_(initv, "U")) {
 	*info = -3;
    } else if (*n < 0) {
 	*info = -5;
    } else if (*ldh < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
 	*info = -11;
    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
 	*info = -13;
    } else if (*mm < *m) {
 	*info = -14;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SHSEIN", &i__1, (ftnlen)6);
 	return 0;
    }

 /*     Quick return if possible. */

    if (*n == 0) {
 	return 0;
    }

 /*     Set machine-dependent constants. */

    unfl = slamch_("Safe minimum");
    ulp = slamch_("Precision");
    smlnum = unfl * (*n / ulp);
    bignum = (1.f - ulp) / smlnum;

    ldwork = *n + 1;

    kl = 1;
    kln = 0;
    if (fromqr) {
 	kr = 0;
    } else {
 	kr = *n;
    }
    ksr = 1;

    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {
 	if (select[k]) {

 /*           Compute eigenvector(s) corresponding to W(K). */

 	    if (fromqr) {

 /*              If affiliation of eigenvalues is known, check whether */
 /*              the matrix splits. */

 /*              Determine KL and KR such that 1 <= KL <= K <= KR <= N */
 /*              and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or */
 /*              KR = N). */

 /*              Then inverse iteration can be performed with the */
 /*              submatrix H(KL:N,KL:N) for a left eigenvector, and with */
 /*              the submatrix H(1:KR,1:KR) for a right eigenvector. */

 		i__2 = kl + 1;
 		for (i__ = k; i__ >= i__2; --i__) {
 		    if (h__[i__ + (i__ - 1) * h_dim1] == 0.f) {
 			goto L30;
 		    }
 /* L20: */
 		}
 L30:
 		kl = i__;
 		if (k > kr) {
 		    i__2 = *n - 1;
 		    for (i__ = k; i__ <= i__2; ++i__) {
 			if (h__[i__ + 1 + i__ * h_dim1] == 0.f) {
 			    goto L50;
 			}
 /* L40: */
 		    }
 L50:
 		    kr = i__;
 		}
 	    }

 	    if (kl != kln) {
 		kln = kl;

 /*              Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it */
 /*              has not ben computed before. */

 		i__2 = kr - kl + 1;
 		hnorm = slanhs_("I", &i__2, &h__[kl + kl * h_dim1], ldh, &
 			work[1]);
 		if (sisnan_(&hnorm)) {
 		    *info = -6;
 		    return 0;
 		} else if (hnorm > 0.f) {
 		    eps3 = hnorm * ulp;
 		} else {
 		    eps3 = smlnum;
 		}
 	    }

 /*           Perturb eigenvalue if it is close to any previous */
 /*           selected eigenvalues affiliated to the submatrix */
 /*           H(KL:KR,KL:KR). Close roots are modified by EPS3. */

 	    wkr = wr[k];
 	    wki = wi[k];
 L60:
 	    i__2 = kl;
 	    for (i__ = k - 1; i__ >= i__2; --i__) {
 		if (select[i__] && (r__1 = wr[i__] - wkr, abs(r__1)) + (r__2 =
 			 wi[i__] - wki, abs(r__2)) < eps3) {
 		    wkr += eps3;
 		    goto L60;
 		}
 /* L70: */
 	    }
 	    wr[k] = wkr;

 	    pair = wki != 0.f;
 	    if (pair) {
 		ksi = ksr + 1;
 	    } else {
 		ksi = ksr;
 	    }
 	    if (leftv) {

 /*              Compute left eigenvector. */

 		i__2 = *n - kl + 1;
 		slaein_(&c_false, &noinit, &i__2, &h__[kl + kl * h_dim1], ldh,
 			 &wkr, &wki, &vl[kl + ksr * vl_dim1], &vl[kl + ksi * 
 			vl_dim1], &work[1], &ldwork, &work[*n * *n + *n + 1], 
 			&eps3, &smlnum, &bignum, &iinfo);
 		if (iinfo > 0) {
 		    if (pair) {
 			*info += 2;
 		    } else {
 			++(*info);
 		    }
 		    ifaill[ksr] = k;
 		    ifaill[ksi] = k;
 		} else {
 		    ifaill[ksr] = 0;
 		    ifaill[ksi] = 0;
 		}
 		i__2 = kl - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    vl[i__ + ksr * vl_dim1] = 0.f;
 /* L80: */
 		}
 		if (pair) {
 		    i__2 = kl - 1;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			vl[i__ + ksi * vl_dim1] = 0.f;
 /* L90: */
 		    }
 		}
 	    }
 	    if (rightv) {

 /*              Compute right eigenvector. */

 		slaein_(&c_true, &noinit, &kr, &h__[h_offset], ldh, &wkr, &
 			wki, &vr[ksr * vr_dim1 + 1], &vr[ksi * vr_dim1 + 1], &
 			work[1], &ldwork, &work[*n * *n + *n + 1], &eps3, &
 			smlnum, &bignum, &iinfo);
 		if (iinfo > 0) {
 		    if (pair) {
 			*info += 2;
 		    } else {
 			++(*info);
 		    }
 		    ifailr[ksr] = k;
 		    ifailr[ksi] = k;
 		} else {
 		    ifailr[ksr] = 0;
 		    ifailr[ksi] = 0;
 		}
 		i__2 = *n;
 		for (i__ = kr + 1; i__ <= i__2; ++i__) {
 		    vr[i__ + ksr * vr_dim1] = 0.f;
 /* L100: */
 		}
 		if (pair) {
 		    i__2 = *n;
 		    for (i__ = kr + 1; i__ <= i__2; ++i__) {
 			vr[i__ + ksi * vr_dim1] = 0.f;
 /* L110: */
 		    }
 		}
 	    }

 	    if (pair) {
 		ksr += 2;
 	    } else {
 		++ksr;
 	    }
 	}
 /* L120: */
    }

    return 0;

 /*     End of SHSEIN */

 } /* shsein_ */

--- a/lapack-netlib/SRC/shseqr.c
+++ b/lapack-netlib/SRC/shseqr.c
@@ -0,0 +1,941 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static real c_b11 = 0.f;
 static real c_b12 = 1.f;
 static integer c__12 = 12;
 static integer c__2 = 2;
 static integer c__49 = 49;

 /* > \brief \b SHSEQR */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SHSEQR + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/shseqr.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/shseqr.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/shseqr.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, */
 /*                          LDZ, WORK, LWORK, INFO ) */

 /*       INTEGER            IHI, ILO, INFO, LDH, LDZ, LWORK, N */
 /*       CHARACTER          COMPZ, JOB */
 /*       REAL               H( LDH, * ), WI( * ), WORK( * ), WR( * ), */
 /*      $                   Z( LDZ, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* >    SHSEQR computes the eigenvalues of a Hessenberg matrix H */
 /* >    and, optionally, the matrices T and Z from the Schur decomposition */
 /* >    H = Z T Z**T, where T is an upper quasi-triangular matrix (the */
 /* >    Schur form), and Z is the orthogonal matrix of Schur vectors. */
 /* > */
 /* >    Optionally Z may be postmultiplied into an input orthogonal */
 /* >    matrix Q so that this routine can give the Schur factorization */
 /* >    of a matrix A which has been reduced to the Hessenberg form H */
 /* >    by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] JOB */
 /* > \verbatim */
 /* >          JOB is CHARACTER*1 */
 /* >           = 'E':  compute eigenvalues only; */
 /* >           = 'S':  compute eigenvalues and the Schur form T. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] COMPZ */
 /* > \verbatim */
 /* >          COMPZ is CHARACTER*1 */
 /* >           = 'N':  no Schur vectors are computed; */
 /* >           = 'I':  Z is initialized to the unit matrix and the matrix Z */
 /* >                   of Schur vectors of H is returned; */
 /* >           = 'V':  Z must contain an orthogonal matrix Q on entry, and */
 /* >                   the product Q*Z is returned. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >           The order of the matrix H.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ILO */
 /* > \verbatim */
 /* >          ILO is INTEGER */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IHI */
 /* > \verbatim */
 /* >          IHI is INTEGER */
 /* > */
 /* >           It is assumed that H is already upper triangular in rows */
 /* >           and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
 /* >           set by a previous call to SGEBAL, and then passed to ZGEHRD */
 /* >           when the matrix output by SGEBAL is reduced to Hessenberg */
 /* >           form. Otherwise ILO and IHI should be set to 1 and N */
 /* >           respectively.  If N > 0, then 1 <= ILO <= IHI <= N. */
 /* >           If N = 0, then ILO = 1 and IHI = 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] H */
 /* > \verbatim */
 /* >          H is REAL array, dimension (LDH,N) */
 /* >           On entry, the upper Hessenberg matrix H. */
 /* >           On exit, if INFO = 0 and JOB = 'S', then H contains the */
 /* >           upper quasi-triangular matrix T from the Schur decomposition */
 /* >           (the Schur form); 2-by-2 diagonal blocks (corresponding to */
 /* >           complex conjugate pairs of eigenvalues) are returned in */
 /* >           standard form, with H(i,i) = H(i+1,i+1) and */
 /* >           H(i+1,i)*H(i,i+1) < 0. If INFO = 0 and JOB = 'E', the */
 /* >           contents of H are unspecified on exit.  (The output value of */
 /* >           H when INFO > 0 is given under the description of INFO */
 /* >           below.) */
 /* > */
 /* >           Unlike earlier versions of SHSEQR, this subroutine may */
 /* >           explicitly H(i,j) = 0 for i > j and j = 1, 2, ... ILO-1 */
 /* >           or j = IHI+1, IHI+2, ... N. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDH */
 /* > \verbatim */
 /* >          LDH is INTEGER */
 /* >           The leading dimension of the array H. LDH >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WR */
 /* > \verbatim */
 /* >          WR is REAL array, dimension (N) */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WI */
 /* > \verbatim */
 /* >          WI is REAL array, dimension (N) */
 /* > */
 /* >           The real and imaginary parts, respectively, of the computed */
 /* >           eigenvalues. If two eigenvalues are computed as a complex */
 /* >           conjugate pair, they are stored in consecutive elements of */
 /* >           WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and */
 /* >           WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in */
 /* >           the same order as on the diagonal of the Schur form returned */
 /* >           in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 */
 /* >           diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */
 /* >           WI(i+1) = -WI(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] Z */
 /* > \verbatim */
 /* >          Z is REAL array, dimension (LDZ,N) */
 /* >           If COMPZ = 'N', Z is not referenced. */
 /* >           If COMPZ = 'I', on entry Z need not be set and on exit, */
 /* >           if INFO = 0, Z contains the orthogonal matrix Z of the Schur */
 /* >           vectors of H.  If COMPZ = 'V', on entry Z must contain an */
 /* >           N-by-N matrix Q, which is assumed to be equal to the unit */
 /* >           matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, */
 /* >           if INFO = 0, Z contains Q*Z. */
 /* >           Normally Q is the orthogonal matrix generated by SORGHR */
 /* >           after the call to SGEHRD which formed the Hessenberg matrix */
 /* >           H. (The output value of Z when INFO > 0 is given under */
 /* >           the description of INFO below.) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDZ */
 /* > \verbatim */
 /* >          LDZ is INTEGER */
 /* >           The leading dimension of the array Z.  if COMPZ = 'I' or */
 /* >           COMPZ = 'V', then LDZ >= MAX(1,N).  Otherwise, LDZ >= 1. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (LWORK) */
 /* >           On exit, if INFO = 0, WORK(1) returns an estimate of */
 /* >           the optimal value for LWORK. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LWORK */
 /* > \verbatim */
 /* >          LWORK is INTEGER */
 /* >           The dimension of the array WORK.  LWORK >= f2cmax(1,N) */
 /* >           is sufficient and delivers very good and sometimes */
 /* >           optimal performance.  However, LWORK as large as 11*N */
 /* >           may be required for optimal performance.  A workspace */
 /* >           query is recommended to determine the optimal workspace */
 /* >           size. */
 /* > */
 /* >           If LWORK = -1, then SHSEQR does a workspace query. */
 /* >           In this case, SHSEQR checks the input parameters and */
 /* >           estimates the optimal workspace size for the given */
 /* >           values of N, ILO and IHI.  The estimate is returned */
 /* >           in WORK(1).  No error message related to LWORK is */
 /* >           issued by XERBLA.  Neither H nor Z are accessed. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >             = 0:  successful exit */
 /* >             < 0:  if INFO = -i, the i-th argument had an illegal */
 /* >                    value */
 /* >             > 0:  if INFO = i, SHSEQR failed to compute all of */
 /* >                the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR */
 /* >                and WI contain those eigenvalues which have been */
 /* >                successfully computed.  (Failures are rare.) */
 /* > */
 /* >                If INFO > 0 and JOB = 'E', then on exit, the */
 /* >                remaining unconverged eigenvalues are the eigen- */
 /* >                values of the upper Hessenberg matrix rows and */
 /* >                columns ILO through INFO of the final, output */
 /* >                value of H. */
 /* > */
 /* >                If INFO > 0 and JOB   = 'S', then on exit */
 /* > */
 /* >           (*)  (initial value of H)*U  = U*(final value of H) */
 /* > */
 /* >                where U is an orthogonal matrix.  The final */
 /* >                value of H is upper Hessenberg and quasi-triangular */
 /* >                in rows and columns INFO+1 through IHI. */
 /* > */
 /* >                If INFO > 0 and COMPZ = 'V', then on exit */
 /* > */
 /* >                  (final value of Z)  =  (initial value of Z)*U */
 /* > */
 /* >                where U is the orthogonal matrix in (*) (regard- */
 /* >                less of the value of JOB.) */
 /* > */
 /* >                If INFO > 0 and COMPZ = 'I', then on exit */
 /* >                      (final value of Z)  = U */
 /* >                where U is the orthogonal matrix in (*) (regard- */
 /* >                less of the value of JOB.) */
 /* > */
 /* >                If INFO > 0 and COMPZ = 'N', then Z is not */
 /* >                accessed. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realOTHERcomputational */

 /* > \par Contributors: */
 /*  ================== */
 /* > */
 /* >       Karen Braman and Ralph Byers, Department of Mathematics, */
 /* >       University of Kansas, USA */

 /* > \par Further Details: */
 /*  ===================== */
 /* > */
 /* > \verbatim */
 /* > */
 /* >             Default values supplied by */
 /* >             ILAENV(ISPEC,'SHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). */
 /* >             It is suggested that these defaults be adjusted in order */
 /* >             to attain best performance in each particular */
 /* >             computational environment. */
 /* > */
 /* >            ISPEC=12: The SLAHQR vs SLAQR0 crossover point. */
 /* >                      Default: 75. (Must be at least 11.) */
 /* > */
 /* >            ISPEC=13: Recommended deflation window size. */
 /* >                      This depends on ILO, IHI and NS.  NS is the */
 /* >                      number of simultaneous shifts returned */
 /* >                      by ILAENV(ISPEC=15).  (See ISPEC=15 below.) */
 /* >                      The default for (IHI-ILO+1) <= 500 is NS. */
 /* >                      The default for (IHI-ILO+1) >  500 is 3*NS/2. */
 /* > */
 /* >            ISPEC=14: Nibble crossover point. (See IPARMQ for */
 /* >                      details.)  Default: 14% of deflation window */
 /* >                      size. */
 /* > */
 /* >            ISPEC=15: Number of simultaneous shifts in a multishift */
 /* >                      QR iteration. */
 /* > */
 /* >                      If IHI-ILO+1 is ... */
 /* > */
 /* >                      greater than      ...but less    ... the */
 /* >                      or equal to ...      than        default is */
 /* > */
 /* >                           1               30          NS =   2(+) */
 /* >                          30               60          NS =   4(+) */
 /* >                          60              150          NS =  10(+) */
 /* >                         150              590          NS =  ** */
 /* >                         590             3000          NS =  64 */
 /* >                        3000             6000          NS = 128 */
 /* >                        6000             infinity      NS = 256 */
 /* > */
 /* >                  (+)  By default some or all matrices of this order */
 /* >                       are passed to the implicit double shift routine */
 /* >                       SLAHQR and this parameter is ignored.  See */
 /* >                       ISPEC=12 above and comments in IPARMQ for */
 /* >                       details. */
 /* > */
 /* >                 (**)  The asterisks (**) indicate an ad-hoc */
 /* >                       function of N increasing from 10 to 64. */
 /* > */
 /* >            ISPEC=16: Select structured matrix multiply. */
 /* >                      If the number of simultaneous shifts (specified */
 /* >                      by ISPEC=15) is less than 14, then the default */
 /* >                      for ISPEC=16 is 0.  Otherwise the default for */
 /* >                      ISPEC=16 is 2. */
 /* > \endverbatim */

 /* > \par References: */
 /*  ================ */
 /* > */
 /* >       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */
 /* >       Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */
 /* >       Performance, SIAM Journal of Matrix Analysis, volume 23, pages */
 /* >       929--947, 2002. */
 /* > \n */
 /* >       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */
 /* >       Algorithm Part II: Aggressive Early Deflation, SIAM Journal */
 /* >       of Matrix Analysis, volume 23, pages 948--973, 2002. */

 /*  ===================================================================== */
 /* Subroutine */ int shseqr_(char *job, char *compz, integer *n, integer *ilo,
 	 integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__,
 	 integer *ldz, real *work, integer *lwork, integer *info)
 {
    /* System generated locals */
    address a__1[2];
    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3;
    real r__1;
    char ch__1[2];

    /* Local variables */
    integer kbot, nmin, i__;
    extern logical lsame_(char *, char *);
    logical initz;
    real workl[49];
    logical wantt, wantz;
    extern /* Subroutine */ int slaqr0_(logical *, logical *, integer *, 
 	    integer *, integer *, real *, integer *, real *, real *, integer *
 	    , integer *, real *, integer *, real *, integer *, integer *);
    real hl[2401]	/* was [49][49] */;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
 	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slahqr_(logical *, logical *, integer *, 
 	    integer *, integer *, real *, integer *, real *, real *, integer *
 	    , integer *, real *, integer *, integer *), slacpy_(char *, 
 	    integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
 	    real *, integer *);
    logical lquery;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


 /*     ==== Matrices of order NTINY or smaller must be processed by */
 /*     .    SLAHQR because of insufficient subdiagonal scratch space. */
 /*     .    (This is a hard limit.) ==== */

 /*     ==== NL allocates some local workspace to help small matrices */
 /*     .    through a rare SLAHQR failure.  NL > NTINY = 15 is */
 /*     .    required and NL <= NMIN = ILAENV(ISPEC=12,...) is recom- */
 /*     .    mended.  (The default value of NMIN is 75.)  Using NL = 49 */
 /*     .    allows up to six simultaneous shifts and a 16-by-16 */
 /*     .    deflation window.  ==== */

 /*     ==== Decode and check the input parameters. ==== */

    /* Parameter adjustments */
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1 * 1;
    h__ -= h_offset;
    --wr;
    --wi;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;

    /* Function Body */
    wantt = lsame_(job, "S");
    initz = lsame_(compz, "I");
    wantz = initz || lsame_(compz, "V");
    work[1] = (real) f2cmax(1,*n);
    lquery = *lwork == -1;

    *info = 0;
    if (! lsame_(job, "E") && ! wantt) {
 	*info = -1;
    } else if (! lsame_(compz, "N") && ! wantz) {
 	*info = -2;
    } else if (*n < 0) {
 	*info = -3;
    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
 	*info = -4;
    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
 	*info = -5;
    } else if (*ldh < f2cmax(1,*n)) {
 	*info = -7;
    } else if (*ldz < 1 || wantz && *ldz < f2cmax(1,*n)) {
 	*info = -11;
    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
 	*info = -13;
    }

    if (*info != 0) {

 /*        ==== Quick return in case of invalid argument. ==== */

 	i__1 = -(*info);
 	xerbla_("SHSEQR", &i__1, (ftnlen)6);
 	return 0;

    } else if (*n == 0) {

 /*        ==== Quick return in case N = 0; nothing to do. ==== */

 	return 0;

    } else if (lquery) {

 /*        ==== Quick return in case of a workspace query ==== */

 	slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
 		1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info);
 /*        ==== Ensure reported workspace size is backward-compatible with */
 /*        .    previous LAPACK versions. ==== */
 /* Computing MAX */
 	r__1 = (real) f2cmax(1,*n);
 	work[1] = f2cmax(r__1,work[1]);
 	return 0;

    } else {

 /*        ==== copy eigenvalues isolated by SGEBAL ==== */

 	i__1 = *ilo - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    wr[i__] = h__[i__ + i__ * h_dim1];
 	    wi[i__] = 0.f;
 /* L10: */
 	}
 	i__1 = *n;
 	for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
 	    wr[i__] = h__[i__ + i__ * h_dim1];
 	    wi[i__] = 0.f;
 /* L20: */
 	}

 /*        ==== Initialize Z, if requested ==== */

 	if (initz) {
 	    slaset_("A", n, n, &c_b11, &c_b12, &z__[z_offset], ldz)
 		    ;
 	}

 /*        ==== Quick return if possible ==== */

 	if (*ilo == *ihi) {
 	    wr[*ilo] = h__[*ilo + *ilo * h_dim1];
 	    wi[*ilo] = 0.f;
 	    return 0;
 	}

 /*        ==== SLAHQR/SLAQR0 crossover point ==== */

 /* Writing concatenation */
 	i__2[0] = 1, a__1[0] = job;
 	i__2[1] = 1, a__1[1] = compz;
 	s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
 	nmin = ilaenv_(&c__12, "SHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6,
 		 (ftnlen)2);
 	nmin = f2cmax(15,nmin);

 /*        ==== SLAQR0 for big matrices; SLAHQR for small ones ==== */

 	if (*n > nmin) {
 	    slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], 
 		    &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, 
 		    info);
 	} else {

 /*           ==== Small matrix ==== */

 	    slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], 
 		    &wi[1], ilo, ihi, &z__[z_offset], ldz, info);

 	    if (*info > 0) {

 /*              ==== A rare SLAHQR failure!  SLAQR0 sometimes succeeds */
 /*              .    when SLAHQR fails. ==== */

 		kbot = *info;

 		if (*n >= 49) {

 /*                 ==== Larger matrices have enough subdiagonal scratch */
 /*                 .    space to call SLAQR0 directly. ==== */

 		    slaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], 
 			    ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], 
 			    ldz, &work[1], lwork, info);

 		} else {

 /*                 ==== Tiny matrices don't have enough subdiagonal */
 /*                 .    scratch space to benefit from SLAQR0.  Hence, */
 /*                 .    tiny matrices must be copied into a larger */
 /*                 .    array before calling SLAQR0. ==== */

 		    slacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49);
 		    hl[*n + 1 + *n * 49 - 50] = 0.f;
 		    i__1 = 49 - *n;
 		    slaset_("A", &c__49, &i__1, &c_b11, &c_b11, &hl[(*n + 1) *
 			     49 - 49], &c__49);
 		    slaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &
 			    wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, 
 			    workl, &c__49, info);
 		    if (wantt || *info != 0) {
 			slacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh);
 		    }
 		}
 	    }
 	}

 /*        ==== Clear out the trash, if necessary. ==== */

 	if ((wantt || *info != 0) && *n > 2) {
 	    i__1 = *n - 2;
 	    i__3 = *n - 2;
 	    slaset_("L", &i__1, &i__3, &c_b11, &c_b11, &h__[h_dim1 + 3], ldh);
 	}

 /*        ==== Ensure reported workspace size is backward-compatible with */
 /*        .    previous LAPACK versions. ==== */

 /* Computing MAX */
 	r__1 = (real) f2cmax(1,*n);
 	work[1] = f2cmax(r__1,work[1]);
    }

 /*     ==== End of SHSEQR ==== */

    return 0;
 } /* shseqr_ */

--- a/lapack-netlib/SRC/sisnan.c
+++ b/lapack-netlib/SRC/sisnan.c
@@ -0,0 +1,469 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SISNAN tests input for NaN. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SISNAN + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sisnan.
 f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sisnan.
 f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sisnan.
 f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       LOGICAL FUNCTION SISNAN( SIN ) */

 /*       REAL SIN */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SISNAN returns .TRUE. if its argument is NaN, and .FALSE. */
 /* > otherwise.  To be replaced by the Fortran 2003 intrinsic in the */
 /* > future. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] SIN */
 /* > \verbatim */
 /* >          SIN is REAL */
 /* >          Input to test for NaN. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup OTHERauxiliary */

 /*  ===================================================================== */
 logical sisnan_(real *sin__)
 {
    /* System generated locals */
    logical ret_val;

    /* Local variables */
    extern logical slaisnan_(real *, real *);


 /*  -- LAPACK auxiliary routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */

    ret_val = slaisnan_(sin__, sin__);
    return ret_val;
 } /* sisnan_ */

--- a/lapack-netlib/SRC/sla_gbamv.c
+++ b/lapack-netlib/SRC/sla_gbamv.c
@@ -0,0 +1,815 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SLA_GBAMV performs a matrix-vector operation to calculate error bounds. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SLA_GBAMV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gba
 mv.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gba
 mv.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gba
 mv.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, */
 /*                             INCX, BETA, Y, INCY ) */

 /*       REAL               ALPHA, BETA */
 /*       INTEGER            INCX, INCY, LDAB, M, N, KL, KU, TRANS */
 /*       REAL               AB( LDAB, * ), X( * ), Y( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SLA_GBAMV  performs one of the matrix-vector operations */
 /* > */
 /* >         y := alpha*abs(A)*abs(x) + beta*abs(y), */
 /* >    or   y := alpha*abs(A)**T*abs(x) + beta*abs(y), */
 /* > */
 /* > where alpha and beta are scalars, x and y are vectors and A is an */
 /* > m by n matrix. */
 /* > */
 /* > This function is primarily used in calculating error bounds. */
 /* > To protect against underflow during evaluation, components in */
 /* > the resulting vector are perturbed away from zero by (N+1) */
 /* > times the underflow threshold.  To prevent unnecessarily large */
 /* > errors for block-structure embedded in general matrices, */
 /* > "symbolically" zero components are not perturbed.  A zero */
 /* > entry is considered "symbolic" if all multiplications involved */
 /* > in computing that entry have at least one zero multiplicand. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is INTEGER */
 /* >           On entry, TRANS specifies the operation to be performed as */
 /* >           follows: */
 /* > */
 /* >             BLAS_NO_TRANS      y := alpha*abs(A)*abs(x) + beta*abs(y) */
 /* >             BLAS_TRANS         y := alpha*abs(A**T)*abs(x) + beta*abs(y) */
 /* >             BLAS_CONJ_TRANS    y := alpha*abs(A**T)*abs(x) + beta*abs(y) */
 /* > */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >           On entry, M specifies the number of rows of the matrix A. */
 /* >           M must be at least zero. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >           On entry, N specifies the number of columns of the matrix A. */
 /* >           N must be at least zero. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >           The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >           The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ALPHA */
 /* > \verbatim */
 /* >          ALPHA is REAL */
 /* >           On entry, ALPHA specifies the scalar alpha. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AB */
 /* > \verbatim */
 /* >          AB is REAL array, dimension ( LDAB, n ) */
 /* >           Before entry, the leading m by n part of the array AB must */
 /* >           contain the matrix of coefficients. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >           On entry, LDA specifies the first dimension of AB as declared */
 /* >           in the calling (sub) program. LDAB must be at least */
 /* >           f2cmax( 1, m ). */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] X */
 /* > \verbatim */
 /* >          X is REAL array, dimension */
 /* >           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
 /* >           and at least */
 /* >           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
 /* >           Before entry, the incremented array X must contain the */
 /* >           vector x. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] INCX */
 /* > \verbatim */
 /* >          INCX is INTEGER */
 /* >           On entry, INCX specifies the increment for the elements of */
 /* >           X. INCX must not be zero. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] BETA */
 /* > \verbatim */
 /* >          BETA is REAL */
 /* >           On entry, BETA specifies the scalar beta. When BETA is */
 /* >           supplied as zero then Y need not be set on input. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] Y */
 /* > \verbatim */
 /* >          Y is REAL array, dimension */
 /* >           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
 /* >           and at least */
 /* >           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
 /* >           Before entry with BETA non-zero, the incremented array Y */
 /* >           must contain the vector y. On exit, Y is overwritten by the */
 /* >           updated vector y. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] INCY */
 /* > \verbatim */
 /* >          INCY is INTEGER */
 /* >           On entry, INCY specifies the increment for the elements of */
 /* >           Y. INCY must not be zero. */
 /* >           Unchanged on exit. */
 /* > */
 /* >  Level 2 Blas routine. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup realGBcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sla_gbamv_(integer *trans, integer *m, integer *n, 
 	integer *kl, integer *ku, real *alpha, real *ab, integer *ldab, real *
 	x, integer *incx, real *beta, real *y, integer *incy)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
    real r__1;

    /* Local variables */
    integer info;
    real temp;
    integer lenx, leny;
    extern integer ilatrans_(char *);
    real safe1;
    integer i__, j;
    logical symb_zero__;
    integer kd, ke, iy, jx, kx, ky;
    extern real slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);


 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */

 /*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --x;
    --y;

    /* Function Body */
    info = 0;
    if (! (*trans == ilatrans_("N") || *trans == ilatrans_("T") || *trans == ilatrans_("C"))) {
 	info = 1;
    } else if (*m < 0) {
 	info = 2;
    } else if (*n < 0) {
 	info = 3;
    } else if (*kl < 0 || *kl > *m - 1) {
 	info = 4;
    } else if (*ku < 0 || *ku > *n - 1) {
 	info = 5;
    } else if (*ldab < *kl + *ku + 1) {
 	info = 6;
    } else if (*incx == 0) {
 	info = 8;
    } else if (*incy == 0) {
 	info = 11;
    }
    if (info != 0) {
 	xerbla_("SLA_GBAMV ", &info, (ftnlen)10);
 	return 0;
    }

 /*     Quick return if possible. */

    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
 	return 0;
    }

 /*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
 /*     up the start points in  X  and  Y. */

    if (*trans == ilatrans_("N")) {
 	lenx = *n;
 	leny = *m;
    } else {
 	lenx = *m;
 	leny = *n;
    }
    if (*incx > 0) {
 	kx = 1;
    } else {
 	kx = 1 - (lenx - 1) * *incx;
    }
    if (*incy > 0) {
 	ky = 1;
    } else {
 	ky = 1 - (leny - 1) * *incy;
    }

 /*     Set SAFE1 essentially to be the underflow threshold times the */
 /*     number of additions in each row. */

    safe1 = slamch_("Safe minimum");
    safe1 = (*n + 1) * safe1;

 /*     Form  y := alpha*abs(A)*abs(x) + beta*abs(y). */

 /*     The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */
 /*     the inexact flag.  Still doesn't help change the iteration order */
 /*     to per-column. */

    kd = *ku + 1;
    ke = *kl + 1;
    iy = ky;
    if (*incx == 1) {
 	if (*trans == ilatrans_("N")) {
 	    i__1 = leny;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		if (*beta == 0.f) {
 		    symb_zero__ = TRUE_;
 		    y[iy] = 0.f;
 		} else if (y[iy] == 0.f) {
 		    symb_zero__ = TRUE_;
 		} else {
 		    symb_zero__ = FALSE_;
 		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
 		}
 		if (*alpha != 0.f) {
 /* Computing MAX */
 		    i__2 = i__ - *kl;
 /* Computing MIN */
 		    i__4 = i__ + *ku;
 		    i__3 = f2cmin(i__4,lenx);
 		    for (j = f2cmax(i__2,1); j <= i__3; ++j) {
 			temp = (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(
 				r__1));
 			symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == 
 				0.f);
 			y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp;
 		    }
 		}
 		if (! symb_zero__) {
 		    y[iy] += r_sign(&safe1, &y[iy]);
 		}
 		iy += *incy;
 	    }
 	} else {
 	    i__1 = leny;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		if (*beta == 0.f) {
 		    symb_zero__ = TRUE_;
 		    y[iy] = 0.f;
 		} else if (y[iy] == 0.f) {
 		    symb_zero__ = TRUE_;
 		} else {
 		    symb_zero__ = FALSE_;
 		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
 		}
 		if (*alpha != 0.f) {
 /* Computing MAX */
 		    i__3 = i__ - *kl;
 /* Computing MIN */
 		    i__4 = i__ + *ku;
 		    i__2 = f2cmin(i__4,lenx);
 		    for (j = f2cmax(i__3,1); j <= i__2; ++j) {
 			temp = (r__1 = ab[ke - i__ + j + i__ * ab_dim1], abs(
 				r__1));
 			symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == 
 				0.f);
 			y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp;
 		    }
 		}
 		if (! symb_zero__) {
 		    y[iy] += r_sign(&safe1, &y[iy]);
 		}
 		iy += *incy;
 	    }
 	}
    } else {
 	if (*trans == ilatrans_("N")) {
 	    i__1 = leny;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		if (*beta == 0.f) {
 		    symb_zero__ = TRUE_;
 		    y[iy] = 0.f;
 		} else if (y[iy] == 0.f) {
 		    symb_zero__ = TRUE_;
 		} else {
 		    symb_zero__ = FALSE_;
 		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
 		}
 		if (*alpha != 0.f) {
 		    jx = kx;
 /* Computing MAX */
 		    i__2 = i__ - *kl;
 /* Computing MIN */
 		    i__4 = i__ + *ku;
 		    i__3 = f2cmin(i__4,lenx);
 		    for (j = f2cmax(i__2,1); j <= i__3; ++j) {
 			temp = (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(
 				r__1));
 			symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == 
 				0.f);
 			y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp;
 			jx += *incx;
 		    }
 		}
 		if (! symb_zero__) {
 		    y[iy] += r_sign(&safe1, &y[iy]);
 		}
 		iy += *incy;
 	    }
 	} else {
 	    i__1 = leny;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		if (*beta == 0.f) {
 		    symb_zero__ = TRUE_;
 		    y[iy] = 0.f;
 		} else if (y[iy] == 0.f) {
 		    symb_zero__ = TRUE_;
 		} else {
 		    symb_zero__ = FALSE_;
 		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
 		}
 		if (*alpha != 0.f) {
 		    jx = kx;
 /* Computing MAX */
 		    i__3 = i__ - *kl;
 /* Computing MIN */
 		    i__4 = i__ + *ku;
 		    i__2 = f2cmin(i__4,lenx);
 		    for (j = f2cmax(i__3,1); j <= i__2; ++j) {
 			temp = (r__1 = ab[ke - i__ + j + i__ * ab_dim1], abs(
 				r__1));
 			symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == 
 				0.f);
 			y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp;
 			jx += *incx;
 		    }
 		}
 		if (! symb_zero__) {
 		    y[iy] += r_sign(&safe1, &y[iy]);
 		}
 		iy += *incy;
 	    }
 	}
    }

    return 0;

 /*     End of SLA_GBAMV */

 } /* sla_gbamv__ */

--- a/lapack-netlib/SRC/sla_gbrcond.c
+++ b/lapack-netlib/SRC/sla_gbrcond.c
@@ -0,0 +1,791 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SLA_GBRCOND estimates the Skeel condition number for a general banded matrix. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SLA_GBRCOND + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gbr
 cond.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gbr
 cond.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gbr
 cond.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       REAL FUNCTION SLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, LDAFB, */
 /*                                  IPIV, CMODE, C, INFO, WORK, IWORK ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            N, LDAB, LDAFB, INFO, KL, KU, CMODE */
 /*       INTEGER            IWORK( * ), IPIV( * ) */
 /*       REAL               AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), */
 /*      $                   C( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* >    SLA_GBRCOND Estimates the Skeel condition number of  op(A) * op2(C) */
 /* >    where op2 is determined by CMODE as follows */
 /* >    CMODE =  1    op2(C) = C */
 /* >    CMODE =  0    op2(C) = I */
 /* >    CMODE = -1    op2(C) = inv(C) */
 /* >    The Skeel condition number  cond(A) = norminf( |inv(A)||A| ) */
 /* >    is computed by computing scaling factors R such that */
 /* >    diag(R)*A*op2(C) is row equilibrated and computing the standard */
 /* >    infinity-norm condition number. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >     Specifies the form of the system of equations: */
 /* >       = 'N':  A * X = B     (No transpose) */
 /* >       = 'T':  A**T * X = B  (Transpose) */
 /* >       = 'C':  A**H * X = B  (Conjugate Transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >     The number of linear equations, i.e., the order of the */
 /* >     matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >     The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >     The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AB */
 /* > \verbatim */
 /* >          AB is REAL array, dimension (LDAB,N) */
 /* >     On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */
 /* >     The j-th column of A is stored in the j-th column of the */
 /* >     array AB as follows: */
 /* >     AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >     The leading dimension of the array AB.  LDAB >= KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AFB */
 /* > \verbatim */
 /* >          AFB is REAL array, dimension (LDAFB,N) */
 /* >     Details of the LU factorization of the band matrix A, as */
 /* >     computed by SGBTRF.  U is stored as an upper triangular */
 /* >     band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */
 /* >     and the multipliers used during the factorization are stored */
 /* >     in rows KL+KU+2 to 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAFB */
 /* > \verbatim */
 /* >          LDAFB is INTEGER */
 /* >     The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >     The pivot indices from the factorization A = P*L*U */
 /* >     as computed by SGBTRF; row i of the matrix was interchanged */
 /* >     with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] CMODE */
 /* > \verbatim */
 /* >          CMODE is INTEGER */
 /* >     Determines op2(C) in the formula op(A) * op2(C) as follows: */
 /* >     CMODE =  1    op2(C) = C */
 /* >     CMODE =  0    op2(C) = I */
 /* >     CMODE = -1    op2(C) = inv(C) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] C */
 /* > \verbatim */
 /* >          C is REAL array, dimension (N) */
 /* >     The vector C in the formula op(A) * op2(C). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >       = 0:  Successful exit. */
 /* >     i > 0:  The ith argument is invalid. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (5*N). */
 /* >     Workspace. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N). */
 /* >     Workspace. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGBcomputational */

 /*  ===================================================================== */
 real sla_gbrcond_(char *trans, integer *n, integer *kl, integer *ku, real *
 	ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, integer *
 	cmode, real *c__, integer *info, real *work, integer *iwork)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4;
    real ret_val, r__1;

    /* Local variables */
    integer kase, i__, j;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int slacn2_(integer *, real *, real *, integer *, 
 	    real *, integer *, integer *);
    integer kd, ke;
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    real ainvnm;
    extern /* Subroutine */ int sgbtrs_(char *, integer *, integer *, integer 
 	    *, integer *, real *, integer *, integer *, real *, integer *, 
 	    integer *);
    real tmp;
    logical notrans;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    afb_dim1 = *ldafb;
    afb_offset = 1 + afb_dim1 * 1;
    afb -= afb_offset;
    --ipiv;
    --c__;
    --work;
    --iwork;

    /* Function Body */
    ret_val = 0.f;

    *info = 0;
    notrans = lsame_(trans, "N");
    if (! notrans && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*kl < 0 || *kl > *n - 1) {
 	*info = -3;
    } else if (*ku < 0 || *ku > *n - 1) {
 	*info = -4;
    } else if (*ldab < *kl + *ku + 1) {
 	*info = -6;
    } else if (*ldafb < (*kl << 1) + *ku + 1) {
 	*info = -8;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SLA_GBRCOND", &i__1, (ftnlen)11);
 	return ret_val;
    }
    if (*n == 0) {
 	ret_val = 1.f;
 	return ret_val;
    }

 /*     Compute the equilibration matrix R such that */
 /*     inv(R)*A*C has unit 1-norm. */

    kd = *ku + 1;
    ke = *kl + 1;
    if (notrans) {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    tmp = 0.f;
 	    if (*cmode == 1) {
 /* Computing MAX */
 		i__2 = i__ - *kl;
 /* Computing MIN */
 		i__4 = i__ + *ku;
 		i__3 = f2cmin(i__4,*n);
 		for (j = f2cmax(i__2,1); j <= i__3; ++j) {
 		    tmp += (r__1 = ab[kd + i__ - j + j * ab_dim1] * c__[j], 
 			    abs(r__1));
 		}
 	    } else if (*cmode == 0) {
 /* Computing MAX */
 		i__3 = i__ - *kl;
 /* Computing MIN */
 		i__4 = i__ + *ku;
 		i__2 = f2cmin(i__4,*n);
 		for (j = f2cmax(i__3,1); j <= i__2; ++j) {
 		    tmp += (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(r__1));
 		}
 	    } else {
 /* Computing MAX */
 		i__2 = i__ - *kl;
 /* Computing MIN */
 		i__4 = i__ + *ku;
 		i__3 = f2cmin(i__4,*n);
 		for (j = f2cmax(i__2,1); j <= i__3; ++j) {
 		    tmp += (r__1 = ab[kd + i__ - j + j * ab_dim1] / c__[j], 
 			    abs(r__1));
 		}
 	    }
 	    work[(*n << 1) + i__] = tmp;
 	}
    } else {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    tmp = 0.f;
 	    if (*cmode == 1) {
 /* Computing MAX */
 		i__3 = i__ - *kl;
 /* Computing MIN */
 		i__4 = i__ + *ku;
 		i__2 = f2cmin(i__4,*n);
 		for (j = f2cmax(i__3,1); j <= i__2; ++j) {
 		    tmp += (r__1 = ab[ke - i__ + j + i__ * ab_dim1] * c__[j], 
 			    abs(r__1));
 		}
 	    } else if (*cmode == 0) {
 /* Computing MAX */
 		i__2 = i__ - *kl;
 /* Computing MIN */
 		i__4 = i__ + *ku;
 		i__3 = f2cmin(i__4,*n);
 		for (j = f2cmax(i__2,1); j <= i__3; ++j) {
 		    tmp += (r__1 = ab[ke - i__ + j + i__ * ab_dim1], abs(r__1)
 			    );
 		}
 	    } else {
 /* Computing MAX */
 		i__3 = i__ - *kl;
 /* Computing MIN */
 		i__4 = i__ + *ku;
 		i__2 = f2cmin(i__4,*n);
 		for (j = f2cmax(i__3,1); j <= i__2; ++j) {
 		    tmp += (r__1 = ab[ke - i__ + j + i__ * ab_dim1] / c__[j], 
 			    abs(r__1));
 		}
 	    }
 	    work[(*n << 1) + i__] = tmp;
 	}
    }

 /*     Estimate the norm of inv(op(A)). */

    ainvnm = 0.f;
    kase = 0;
 L10:
    slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
    if (kase != 0) {
 	if (kase == 2) {

 /*           Multiply by R. */

 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		work[i__] *= work[(*n << 1) + i__];
 	    }
 	    if (notrans) {
 		sgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], 
 			ldafb, &ipiv[1], &work[1], n, info);
 	    } else {
 		sgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], 
 			ldafb, &ipiv[1], &work[1], n, info);
 	    }

 /*           Multiply by inv(C). */

 	    if (*cmode == 1) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    work[i__] /= c__[i__];
 		}
 	    } else if (*cmode == -1) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    work[i__] *= c__[i__];
 		}
 	    }
 	} else {

 /*           Multiply by inv(C**T). */

 	    if (*cmode == 1) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    work[i__] /= c__[i__];
 		}
 	    } else if (*cmode == -1) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    work[i__] *= c__[i__];
 		}
 	    }
 	    if (notrans) {
 		sgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], 
 			ldafb, &ipiv[1], &work[1], n, info);
 	    } else {
 		sgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], 
 			ldafb, &ipiv[1], &work[1], n, info);
 	    }

 /*           Multiply by R. */

 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		work[i__] *= work[(*n << 1) + i__];
 	    }
 	}
 	goto L10;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.f) {
 	ret_val = 1.f / ainvnm;
    }

    return ret_val;

 } /* sla_gbrcond__ */

--- a/lapack-netlib/SRC/sla_gbrfsx_extended.c
+++ b/lapack-netlib/SRC/sla_gbrfsx_extended.c
--- a/lapack-netlib/SRC/sla_gbrpvgrw.c
+++ b/lapack-netlib/SRC/sla_gbrpvgrw.c
@@ -0,0 +1,569 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SLA_GBRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a general banded m
 atrix. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SLA_GBRPVGRW + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gbr
 pvgrw.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gbr
 pvgrw.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gbr
 pvgrw.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       REAL FUNCTION SLA_GBRPVGRW( N, KL, KU, NCOLS, AB, LDAB, AFB, */
 /*                                   LDAFB ) */

 /*       INTEGER            N, KL, KU, NCOLS, LDAB, LDAFB */
 /*       REAL               AB( LDAB, * ), AFB( LDAFB, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SLA_GBRPVGRW computes the reciprocal pivot growth factor */
 /* > norm(A)/norm(U). The "f2cmax absolute element" norm is used. If this is */
 /* > much less than 1, the stability of the LU factorization of the */
 /* > (equilibrated) matrix A could be poor. This also means that the */
 /* > solution X, estimated condition numbers, and error bounds could be */
 /* > unreliable. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >     The number of linear equations, i.e., the order of the */
 /* >     matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KL */
 /* > \verbatim */
 /* >          KL is INTEGER */
 /* >     The number of subdiagonals within the band of A.  KL >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] KU */
 /* > \verbatim */
 /* >          KU is INTEGER */
 /* >     The number of superdiagonals within the band of A.  KU >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NCOLS */
 /* > \verbatim */
 /* >          NCOLS is INTEGER */
 /* >     The number of columns of the matrix A.  NCOLS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AB */
 /* > \verbatim */
 /* >          AB is REAL array, dimension (LDAB,N) */
 /* >     On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */
 /* >     The j-th column of A is stored in the j-th column of the */
 /* >     array AB as follows: */
 /* >     AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAB */
 /* > \verbatim */
 /* >          LDAB is INTEGER */
 /* >     The leading dimension of the array AB.  LDAB >= KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AFB */
 /* > \verbatim */
 /* >          AFB is REAL array, dimension (LDAFB,N) */
 /* >     Details of the LU factorization of the band matrix A, as */
 /* >     computed by SGBTRF.  U is stored as an upper triangular */
 /* >     band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */
 /* >     and the multipliers used during the factorization are stored */
 /* >     in rows KL+KU+2 to 2*KL+KU+1. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAFB */
 /* > \verbatim */
 /* >          LDAFB is INTEGER */
 /* >     The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGBcomputational */

 /*  ===================================================================== */
 real sla_gbrpvgrw_(integer *n, integer *kl, integer *ku, integer *ncols, 
 	real *ab, integer *ldab, real *afb, integer *ldafb)
 {
    /* System generated locals */
    integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4;
    real ret_val, r__1, r__2;

    /* Local variables */
    real amax, umax;
    integer i__, j, kd;
    real rpvgrw;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    afb_dim1 = *ldafb;
    afb_offset = 1 + afb_dim1 * 1;
    afb -= afb_offset;

    /* Function Body */
    rpvgrw = 1.f;
    kd = *ku + 1;
    i__1 = *ncols;
    for (j = 1; j <= i__1; ++j) {
 	amax = 0.f;
 	umax = 0.f;
 /* Computing MAX */
 	i__2 = j - *ku;
 /* Computing MIN */
 	i__4 = j + *kl;
 	i__3 = f2cmin(i__4,*n);
 	for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) {
 /* Computing MAX */
 	    r__2 = (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(r__1));
 	    amax = f2cmax(r__2,amax);
 	}
 /* Computing MAX */
 	i__3 = j - *ku;
 	i__2 = j;
 	for (i__ = f2cmax(i__3,1); i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    r__2 = (r__1 = afb[kd + i__ - j + j * afb_dim1], abs(r__1));
 	    umax = f2cmax(r__2,umax);
 	}
 	if (umax != 0.f) {
 /* Computing MIN */
 	    r__1 = amax / umax;
 	    rpvgrw = f2cmin(r__1,rpvgrw);
 	}
    }
    ret_val = rpvgrw;
    return ret_val;
 } /* sla_gbrpvgrw__ */

--- a/lapack-netlib/SRC/sla_geamv.c
+++ b/lapack-netlib/SRC/sla_geamv.c
@@ -0,0 +1,779 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SLA_GEAMV computes a matrix-vector product using a general matrix to calculate error bounds. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SLA_GEAMV + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gea
 mv.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gea
 mv.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gea
 mv.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       SUBROUTINE SLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, */
 /*                              Y, INCY ) */

 /*       REAL               ALPHA, BETA */
 /*       INTEGER            INCX, INCY, LDA, M, N, TRANS */
 /*       REAL               A( LDA, * ), X( * ), Y( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SLA_GEAMV  performs one of the matrix-vector operations */
 /* > */
 /* >         y := alpha*abs(A)*abs(x) + beta*abs(y), */
 /* >    or   y := alpha*abs(A)**T*abs(x) + beta*abs(y), */
 /* > */
 /* > where alpha and beta are scalars, x and y are vectors and A is an */
 /* > m by n matrix. */
 /* > */
 /* > This function is primarily used in calculating error bounds. */
 /* > To protect against underflow during evaluation, components in */
 /* > the resulting vector are perturbed away from zero by (N+1) */
 /* > times the underflow threshold.  To prevent unnecessarily large */
 /* > errors for block-structure embedded in general matrices, */
 /* > "symbolically" zero components are not perturbed.  A zero */
 /* > entry is considered "symbolic" if all multiplications involved */
 /* > in computing that entry have at least one zero multiplicand. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is INTEGER */
 /* >           On entry, TRANS specifies the operation to be performed as */
 /* >           follows: */
 /* > */
 /* >             BLAS_NO_TRANS      y := alpha*abs(A)*abs(x) + beta*abs(y) */
 /* >             BLAS_TRANS         y := alpha*abs(A**T)*abs(x) + beta*abs(y) */
 /* >             BLAS_CONJ_TRANS    y := alpha*abs(A**T)*abs(x) + beta*abs(y) */
 /* > */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] M */
 /* > \verbatim */
 /* >          M is INTEGER */
 /* >           On entry, M specifies the number of rows of the matrix A. */
 /* >           M must be at least zero. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >           On entry, N specifies the number of columns of the matrix A. */
 /* >           N must be at least zero. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] ALPHA */
 /* > \verbatim */
 /* >          ALPHA is REAL */
 /* >           On entry, ALPHA specifies the scalar alpha. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension ( LDA, n ) */
 /* >           Before entry, the leading m by n part of the array A must */
 /* >           contain the matrix of coefficients. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >           On entry, LDA specifies the first dimension of A as declared */
 /* >           in the calling (sub) program. LDA must be at least */
 /* >           f2cmax( 1, m ). */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] X */
 /* > \verbatim */
 /* >          X is REAL array, dimension */
 /* >           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
 /* >           and at least */
 /* >           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
 /* >           Before entry, the incremented array X must contain the */
 /* >           vector x. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] INCX */
 /* > \verbatim */
 /* >          INCX is INTEGER */
 /* >           On entry, INCX specifies the increment for the elements of */
 /* >           X. INCX must not be zero. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] BETA */
 /* > \verbatim */
 /* >          BETA is REAL */
 /* >           On entry, BETA specifies the scalar beta. When BETA is */
 /* >           supplied as zero then Y need not be set on input. */
 /* >           Unchanged on exit. */
 /* > \endverbatim */
 /* > */
 /* > \param[in,out] Y */
 /* > \verbatim */
 /* >          Y is REAL array, */
 /* >           dimension at least */
 /* >           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
 /* >           and at least */
 /* >           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
 /* >           Before entry with BETA non-zero, the incremented array Y */
 /* >           must contain the vector y. On exit, Y is overwritten by the */
 /* >           updated vector y. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] INCY */
 /* > \verbatim */
 /* >          INCY is INTEGER */
 /* >           On entry, INCY specifies the increment for the elements of */
 /* >           Y. INCY must not be zero. */
 /* >           Unchanged on exit. */
 /* > */
 /* >  Level 2 Blas routine. */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date June 2017 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 /* Subroutine */ int sla_geamv_(integer *trans, integer *m, integer *n, real 
 	*alpha, real *a, integer *lda, real *x, integer *incx, real *beta, 
 	real *y, integer *incy)
 {
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    integer info;
    real temp;
    integer lenx, leny;
    extern integer ilatrans_(char *);
    real safe1;
    integer i__, j;
    logical symb_zero__;
    integer iy, jx, kx, ky;
    extern real slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);


 /*  -- LAPACK computational routine (version 3.7.1) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     June 2017 */


 /*  ===================================================================== */


 /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --x;
    --y;

    /* Function Body */
    info = 0;
    if (! (*trans == ilatrans_("N") || *trans == ilatrans_("T") || *trans == ilatrans_("C"))) {
 	info = 1;
    } else if (*m < 0) {
 	info = 2;
    } else if (*n < 0) {
 	info = 3;
    } else if (*lda < f2cmax(1,*m)) {
 	info = 6;
    } else if (*incx == 0) {
 	info = 8;
    } else if (*incy == 0) {
 	info = 11;
    }
    if (info != 0) {
 	xerbla_("SLA_GEAMV ", &info, (ftnlen)10);
 	return 0;
    }

 /*     Quick return if possible. */

    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
 	return 0;
    }

 /*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
 /*     up the start points in  X  and  Y. */

    if (*trans == ilatrans_("N")) {
 	lenx = *n;
 	leny = *m;
    } else {
 	lenx = *m;
 	leny = *n;
    }
    if (*incx > 0) {
 	kx = 1;
    } else {
 	kx = 1 - (lenx - 1) * *incx;
    }
    if (*incy > 0) {
 	ky = 1;
    } else {
 	ky = 1 - (leny - 1) * *incy;
    }

 /*     Set SAFE1 essentially to be the underflow threshold times the */
 /*     number of additions in each row. */

    safe1 = slamch_("Safe minimum");
    safe1 = (*n + 1) * safe1;

 /*     Form  y := alpha*abs(A)*abs(x) + beta*abs(y). */

 /*     The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */
 /*     the inexact flag.  Still doesn't help change the iteration order */
 /*     to per-column. */

    iy = ky;
    if (*incx == 1) {
 	if (*trans == ilatrans_("N")) {
 	    i__1 = leny;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		if (*beta == 0.f) {
 		    symb_zero__ = TRUE_;
 		    y[iy] = 0.f;
 		} else if (y[iy] == 0.f) {
 		    symb_zero__ = TRUE_;
 		} else {
 		    symb_zero__ = FALSE_;
 		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
 		}
 		if (*alpha != 0.f) {
 		    i__2 = lenx;
 		    for (j = 1; j <= i__2; ++j) {
 			temp = (r__1 = a[i__ + j * a_dim1], abs(r__1));
 			symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == 
 				0.f);
 			y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp;
 		    }
 		}
 		if (! symb_zero__) {
 		    y[iy] += r_sign(&safe1, &y[iy]);
 		}
 		iy += *incy;
 	    }
 	} else {
 	    i__1 = leny;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		if (*beta == 0.f) {
 		    symb_zero__ = TRUE_;
 		    y[iy] = 0.f;
 		} else if (y[iy] == 0.f) {
 		    symb_zero__ = TRUE_;
 		} else {
 		    symb_zero__ = FALSE_;
 		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
 		}
 		if (*alpha != 0.f) {
 		    i__2 = lenx;
 		    for (j = 1; j <= i__2; ++j) {
 			temp = (r__1 = a[j + i__ * a_dim1], abs(r__1));
 			symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == 
 				0.f);
 			y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp;
 		    }
 		}
 		if (! symb_zero__) {
 		    y[iy] += r_sign(&safe1, &y[iy]);
 		}
 		iy += *incy;
 	    }
 	}
    } else {
 	if (*trans == ilatrans_("N")) {
 	    i__1 = leny;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		if (*beta == 0.f) {
 		    symb_zero__ = TRUE_;
 		    y[iy] = 0.f;
 		} else if (y[iy] == 0.f) {
 		    symb_zero__ = TRUE_;
 		} else {
 		    symb_zero__ = FALSE_;
 		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
 		}
 		if (*alpha != 0.f) {
 		    jx = kx;
 		    i__2 = lenx;
 		    for (j = 1; j <= i__2; ++j) {
 			temp = (r__1 = a[i__ + j * a_dim1], abs(r__1));
 			symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == 
 				0.f);
 			y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp;
 			jx += *incx;
 		    }
 		}
 		if (! symb_zero__) {
 		    y[iy] += r_sign(&safe1, &y[iy]);
 		}
 		iy += *incy;
 	    }
 	} else {
 	    i__1 = leny;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		if (*beta == 0.f) {
 		    symb_zero__ = TRUE_;
 		    y[iy] = 0.f;
 		} else if (y[iy] == 0.f) {
 		    symb_zero__ = TRUE_;
 		} else {
 		    symb_zero__ = FALSE_;
 		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
 		}
 		if (*alpha != 0.f) {
 		    jx = kx;
 		    i__2 = lenx;
 		    for (j = 1; j <= i__2; ++j) {
 			temp = (r__1 = a[j + i__ * a_dim1], abs(r__1));
 			symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == 
 				0.f);
 			y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp;
 			jx += *incx;
 		    }
 		}
 		if (! symb_zero__) {
 		    y[iy] += r_sign(&safe1, &y[iy]);
 		}
 		iy += *incy;
 	    }
 	}
    }

    return 0;

 /*     End of SLA_GEAMV */

 } /* sla_geamv__ */

--- a/lapack-netlib/SRC/sla_gercond.c
+++ b/lapack-netlib/SRC/sla_gercond.c
@@ -0,0 +1,735 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* Table of constant values */

 static integer c__1 = 1;

 /* > \brief \b SLA_GERCOND estimates the Skeel condition number for a general matrix. */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SLA_GERCOND + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_ger
 cond.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_ger
 cond.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_ger
 cond.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       REAL FUNCTION SLA_GERCOND ( TRANS, N, A, LDA, AF, LDAF, IPIV, */
 /*                                   CMODE, C, INFO, WORK, IWORK ) */

 /*       CHARACTER          TRANS */
 /*       INTEGER            N, LDA, LDAF, INFO, CMODE */
 /*       INTEGER            IPIV( * ), IWORK( * ) */
 /*       REAL               A( LDA, * ), AF( LDAF, * ), WORK( * ), */
 /*      $                   C( * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* >    SLA_GERCOND estimates the Skeel condition number of op(A) * op2(C) */
 /* >    where op2 is determined by CMODE as follows */
 /* >    CMODE =  1    op2(C) = C */
 /* >    CMODE =  0    op2(C) = I */
 /* >    CMODE = -1    op2(C) = inv(C) */
 /* >    The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */
 /* >    is computed by computing scaling factors R such that */
 /* >    diag(R)*A*op2(C) is row equilibrated and computing the standard */
 /* >    infinity-norm condition number. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] TRANS */
 /* > \verbatim */
 /* >          TRANS is CHARACTER*1 */
 /* >     Specifies the form of the system of equations: */
 /* >       = 'N':  A * X = B     (No transpose) */
 /* >       = 'T':  A**T * X = B  (Transpose) */
 /* >       = 'C':  A**H * X = B  (Conjugate Transpose = Transpose) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >     The number of linear equations, i.e., the order of the */
 /* >     matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >     On entry, the N-by-N matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >     The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AF */
 /* > \verbatim */
 /* >          AF is REAL array, dimension (LDAF,N) */
 /* >     The factors L and U from the factorization */
 /* >     A = P*L*U as computed by SGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAF */
 /* > \verbatim */
 /* >          LDAF is INTEGER */
 /* >     The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] IPIV */
 /* > \verbatim */
 /* >          IPIV is INTEGER array, dimension (N) */
 /* >     The pivot indices from the factorization A = P*L*U */
 /* >     as computed by SGETRF; row i of the matrix was interchanged */
 /* >     with row IPIV(i). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] CMODE */
 /* > \verbatim */
 /* >          CMODE is INTEGER */
 /* >     Determines op2(C) in the formula op(A) * op2(C) as follows: */
 /* >     CMODE =  1    op2(C) = C */
 /* >     CMODE =  0    op2(C) = I */
 /* >     CMODE = -1    op2(C) = inv(C) */
 /* > \endverbatim */
 /* > */
 /* > \param[in] C */
 /* > \verbatim */
 /* >          C is REAL array, dimension (N) */
 /* >     The vector C in the formula op(A) * op2(C). */
 /* > \endverbatim */
 /* > */
 /* > \param[out] INFO */
 /* > \verbatim */
 /* >          INFO is INTEGER */
 /* >       = 0:  Successful exit. */
 /* >     i > 0:  The ith argument is invalid. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] WORK */
 /* > \verbatim */
 /* >          WORK is REAL array, dimension (3*N). */
 /* >     Workspace. */
 /* > \endverbatim */
 /* > */
 /* > \param[out] IWORK */
 /* > \verbatim */
 /* >          IWORK is INTEGER array, dimension (N). */
 /* >     Workspace.2 */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 real sla_gercond_(char *trans, integer *n, real *a, integer *lda, real *af, 
 	integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer *
 	info, real *work, integer *iwork)
 {
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2;
    real ret_val, r__1;

    /* Local variables */
    integer kase, i__, j;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int slacn2_(integer *, real *, real *, integer *, 
 	    real *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
    real ainvnm;
    extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, 
 	    integer *, integer *, real *, integer *, integer *);
    real tmp;
    logical notrans;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    --c__;
    --work;
    --iwork;

    /* Function Body */
    ret_val = 0.f;

    *info = 0;
    notrans = lsame_(trans, "N");
    if (! notrans && ! lsame_(trans, "T") && ! lsame_(
 	    trans, "C")) {
 	*info = -1;
    } else if (*n < 0) {
 	*info = -2;
    } else if (*lda < f2cmax(1,*n)) {
 	*info = -4;
    } else if (*ldaf < f2cmax(1,*n)) {
 	*info = -6;
    }
    if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SLA_GERCOND", &i__1, (ftnlen)11);
 	return ret_val;
    }
    if (*n == 0) {
 	ret_val = 1.f;
 	return ret_val;
    }

 /*     Compute the equilibration matrix R such that */
 /*     inv(R)*A*C has unit 1-norm. */

    if (notrans) {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    tmp = 0.f;
 	    if (*cmode == 1) {
 		i__2 = *n;
 		for (j = 1; j <= i__2; ++j) {
 		    tmp += (r__1 = a[i__ + j * a_dim1] * c__[j], abs(r__1));
 		}
 	    } else if (*cmode == 0) {
 		i__2 = *n;
 		for (j = 1; j <= i__2; ++j) {
 		    tmp += (r__1 = a[i__ + j * a_dim1], abs(r__1));
 		}
 	    } else {
 		i__2 = *n;
 		for (j = 1; j <= i__2; ++j) {
 		    tmp += (r__1 = a[i__ + j * a_dim1] / c__[j], abs(r__1));
 		}
 	    }
 	    work[(*n << 1) + i__] = tmp;
 	}
    } else {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    tmp = 0.f;
 	    if (*cmode == 1) {
 		i__2 = *n;
 		for (j = 1; j <= i__2; ++j) {
 		    tmp += (r__1 = a[j + i__ * a_dim1] * c__[j], abs(r__1));
 		}
 	    } else if (*cmode == 0) {
 		i__2 = *n;
 		for (j = 1; j <= i__2; ++j) {
 		    tmp += (r__1 = a[j + i__ * a_dim1], abs(r__1));
 		}
 	    } else {
 		i__2 = *n;
 		for (j = 1; j <= i__2; ++j) {
 		    tmp += (r__1 = a[j + i__ * a_dim1] / c__[j], abs(r__1));
 		}
 	    }
 	    work[(*n << 1) + i__] = tmp;
 	}
    }

 /*     Estimate the norm of inv(op(A)). */

    ainvnm = 0.f;
    kase = 0;
 L10:
    slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
    if (kase != 0) {
 	if (kase == 2) {

 /*           Multiply by R. */

 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		work[i__] *= work[(*n << 1) + i__];
 	    }
 	    if (notrans) {
 		sgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[
 			1], &work[1], n, info);
 	    } else {
 		sgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1],
 			 &work[1], n, info);
 	    }

 /*           Multiply by inv(C). */

 	    if (*cmode == 1) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    work[i__] /= c__[i__];
 		}
 	    } else if (*cmode == -1) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    work[i__] *= c__[i__];
 		}
 	    }
 	} else {

 /*           Multiply by inv(C**T). */

 	    if (*cmode == 1) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    work[i__] /= c__[i__];
 		}
 	    } else if (*cmode == -1) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
 		    work[i__] *= c__[i__];
 		}
 	    }
 	    if (notrans) {
 		sgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1],
 			 &work[1], n, info);
 	    } else {
 		sgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[
 			1], &work[1], n, info);
 	    }

 /*           Multiply by R. */

 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		work[i__] *= work[(*n << 1) + i__];
 	    }
 	}
 	goto L10;
    }

 /*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.f) {
 	ret_val = 1.f / ainvnm;
    }

    return ret_val;

 } /* sla_gercond__ */

--- a/lapack-netlib/SRC/sla_gerfsx_extended.c
+++ b/lapack-netlib/SRC/sla_gerfsx_extended.c
--- a/lapack-netlib/SRC/sla_gerpvgrw.c
+++ b/lapack-netlib/SRC/sla_gerpvgrw.c
@@ -0,0 +1,542 @@
 /* f2c.h  --  Standard Fortran to C header file */

 /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."

 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */

 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <complex.h>
 #ifdef complex
 #undef complex
 #endif
 #ifdef I
 #undef I
 #endif

 typedef int integer;
 typedef unsigned int uinteger;
 typedef char *address;
 typedef short int shortint;
 typedef float real;
 typedef double doublereal;
 typedef struct { real r, i; } complex;
 typedef struct { doublereal r, i; } doublecomplex;
 static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
 static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
 static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
 #define pCf(z) (*_pCf(z))
 #define pCd(z) (*_pCd(z))
 typedef int logical;
 typedef short int shortlogical;
 typedef char logical1;
 typedef char integer1;

 #define TRUE_ (1)
 #define FALSE_ (0)

 /* Extern is for use with -E */
 #ifndef Extern
 #define Extern extern
 #endif

 /* I/O stuff */

 typedef int flag;
 typedef int ftnlen;
 typedef int ftnint;

 /*external read, write*/
 typedef struct
 {	flag cierr;
 	ftnint ciunit;
 	flag ciend;
 	char *cifmt;
 	ftnint cirec;
 } cilist;

 /*internal read, write*/
 typedef struct
 {	flag icierr;
 	char *iciunit;
 	flag iciend;
 	char *icifmt;
 	ftnint icirlen;
 	ftnint icirnum;
 } icilist;

 /*open*/
 typedef struct
 {	flag oerr;
 	ftnint ounit;
 	char *ofnm;
 	ftnlen ofnmlen;
 	char *osta;
 	char *oacc;
 	char *ofm;
 	ftnint orl;
 	char *oblnk;
 } olist;

 /*close*/
 typedef struct
 {	flag cerr;
 	ftnint cunit;
 	char *csta;
 } cllist;

 /*rewind, backspace, endfile*/
 typedef struct
 {	flag aerr;
 	ftnint aunit;
 } alist;

 /* inquire */
 typedef struct
 {	flag inerr;
 	ftnint inunit;
 	char *infile;
 	ftnlen infilen;
 	ftnint	*inex;	/*parameters in standard's order*/
 	ftnint	*inopen;
 	ftnint	*innum;
 	ftnint	*innamed;
 	char	*inname;
 	ftnlen	innamlen;
 	char	*inacc;
 	ftnlen	inacclen;
 	char	*inseq;
 	ftnlen	inseqlen;
 	char 	*indir;
 	ftnlen	indirlen;
 	char	*infmt;
 	ftnlen	infmtlen;
 	char	*inform;
 	ftnint	informlen;
 	char	*inunf;
 	ftnlen	inunflen;
 	ftnint	*inrecl;
 	ftnint	*innrec;
 	char	*inblank;
 	ftnlen	inblanklen;
 } inlist;

 #define VOID void

 union Multitype {	/* for multiple entry points */
 	integer1 g;
 	shortint h;
 	integer i;
 	/* longint j; */
 	real r;
 	doublereal d;
 	complex c;
 	doublecomplex z;
 	};

 typedef union Multitype Multitype;

 struct Vardesc {	/* for Namelist */
 	char *name;
 	char *addr;
 	ftnlen *dims;
 	int  type;
 	};
 typedef struct Vardesc Vardesc;

 struct Namelist {
 	char *name;
 	Vardesc **vars;
 	int nvars;
 	};
 typedef struct Namelist Namelist;

 #define abs(x) ((x) >= 0 ? (x) : -(x))
 #define dabs(x) (fabs(x))
 #define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
 #define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
 #define dmin(a,b) (f2cmin(a,b))
 #define dmax(a,b) (f2cmax(a,b))
 #define bit_test(a,b)	((a) >> (b) & 1)
 #define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
 #define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))

 #define abort_() { sig_die("Fortran abort routine called", 1); }
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
 #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
 #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
 #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
 //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
 #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
 #define d_abs(x) (fabs(*(x)))
 #define d_acos(x) (acos(*(x)))
 #define d_asin(x) (asin(*(x)))
 #define d_atan(x) (atan(*(x)))
 #define d_atn2(x, y) (atan2(*(x),*(y)))
 #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
 #define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
 #define d_cos(x) (cos(*(x)))
 #define d_cosh(x) (cosh(*(x)))
 #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
 #define d_exp(x) (exp(*(x)))
 #define d_imag(z) (cimag(Cd(z)))
 #define r_imag(z) (cimag(Cf(z)))
 #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
 #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
 #define d_log(x) (log(*(x)))
 #define d_mod(x, y) (fmod(*(x), *(y)))
 #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
 #define d_nint(x) u_nint(*(x))
 #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
 #define d_sign(a,b) u_sign(*(a),*(b))
 #define r_sign(a,b) u_sign(*(a),*(b))
 #define d_sin(x) (sin(*(x)))
 #define d_sinh(x) (sinh(*(x)))
 #define d_sqrt(x) (sqrt(*(x)))
 #define d_tan(x) (tan(*(x)))
 #define d_tanh(x) (tanh(*(x)))
 #define i_abs(x) abs(*(x))
 #define i_dnnt(x) ((integer)u_nint(*(x)))
 #define i_len(s, n) (n)
 #define i_nint(x) ((integer)u_nint(*(x)))
 #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
 #define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
 #define pow_si(B,E) spow_ui(*(B),*(E))
 #define pow_ri(B,E) spow_ui(*(B),*(E))
 #define pow_di(B,E) dpow_ui(*(B),*(E))
 #define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
 #define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
 #define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
 #define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
 #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
 static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
 #define myexit_() break;
 #define mycycle() continue;
 #define myceiling(w) {ceil(w)}
 #define myhuge(w) {HUGE_VAL}
 //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
 #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}

 /* procedure parameter types for -A and -C++ */

 #define F2C_proc_par_types 1
 #ifdef __cplusplus
 typedef logical (*L_fp)(...);
 #else
 typedef logical (*L_fp)();
 #endif

 static float spow_ui(float x, integer n) {
 	float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static double dpow_ui(double x, integer n) {
 	double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex float cpow_ui(_Complex float x, integer n) {
 	_Complex float pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static _Complex double zpow_ui(_Complex double x, integer n) {
 	_Complex double pow=1.0; unsigned long int u;
 	if(n != 0) {
 		if(n < 0) n = -n, x = 1/x;
 		for(u = n; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer pow_ii(integer x, integer n) {
 	integer pow; unsigned long int u;
 	if (n <= 0) {
 		if (n == 0 || x == 1) pow = 1;
 		else if (x != -1) pow = x == 0 ? 1/x : 0;
 		else n = -n;
 	}
 	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
 		u = n;
 		for(pow = 1; ; ) {
 			if(u & 01) pow *= x;
 			if(u >>= 1) x *= x;
 			else break;
 		}
 	}
 	return pow;
 }
 static integer dmaxloc_(double *w, integer s, integer e, integer *n)
 {
 	double m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static integer smaxloc_(float *w, integer s, integer e, integer *n)
 {
 	float m; integer i, mi;
 	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }	
 static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex float zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i]) * Cf(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
 		}
 	}
 	pCf(z) = zdotc;
 }
 static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 	_Complex double zdotc = 0.0;
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i]) * Cd(&y[i]);
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
 			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
 		}
 	}
 	pCd(z) = zdotc;
 }
 #endif
 /*  -- translated by f2c (version 20000121).
   You must link the resulting object file with the libraries:
 	-lf2c -lm   (in that order)
 */



 /* > \brief \b SLA_GERPVGRW */

 /*  =========== DOCUMENTATION =========== */

 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */

 /* > \htmlonly */
 /* > Download SLA_GERPVGRW + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_ger
 pvgrw.f"> */
 /* > [TGZ]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_ger
 pvgrw.f"> */
 /* > [ZIP]</a> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_ger
 pvgrw.f"> */
 /* > [TXT]</a> */
 /* > \endhtmlonly */

 /*  Definition: */
 /*  =========== */

 /*       REAL FUNCTION SLA_GERPVGRW( N, NCOLS, A, LDA, AF, LDAF ) */

 /*       INTEGER            N, NCOLS, LDA, LDAF */
 /*       REAL               A( LDA, * ), AF( LDAF, * ) */


 /* > \par Purpose: */
 /*  ============= */
 /* > */
 /* > \verbatim */
 /* > */
 /* > SLA_GERPVGRW computes the reciprocal pivot growth factor */
 /* > norm(A)/norm(U). The "f2cmax absolute element" norm is used. If this is */
 /* > much less than 1, the stability of the LU factorization of the */
 /* > (equilibrated) matrix A could be poor. This also means that the */
 /* > solution X, estimated condition numbers, and error bounds could be */
 /* > unreliable. */
 /* > \endverbatim */

 /*  Arguments: */
 /*  ========== */

 /* > \param[in] N */
 /* > \verbatim */
 /* >          N is INTEGER */
 /* >     The number of linear equations, i.e., the order of the */
 /* >     matrix A.  N >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] NCOLS */
 /* > \verbatim */
 /* >          NCOLS is INTEGER */
 /* >     The number of columns of the matrix A. NCOLS >= 0. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] A */
 /* > \verbatim */
 /* >          A is REAL array, dimension (LDA,N) */
 /* >     On entry, the N-by-N matrix A. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDA */
 /* > \verbatim */
 /* >          LDA is INTEGER */
 /* >     The leading dimension of the array A.  LDA >= f2cmax(1,N). */
 /* > \endverbatim */
 /* > */
 /* > \param[in] AF */
 /* > \verbatim */
 /* >          AF is REAL array, dimension (LDAF,N) */
 /* >     The factors L and U from the factorization */
 /* >     A = P*L*U as computed by SGETRF. */
 /* > \endverbatim */
 /* > */
 /* > \param[in] LDAF */
 /* > \verbatim */
 /* >          LDAF is INTEGER */
 /* >     The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
 /* > \endverbatim */

 /*  Authors: */
 /*  ======== */

 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */

 /* > \date December 2016 */

 /* > \ingroup realGEcomputational */

 /*  ===================================================================== */
 real sla_gerpvgrw_(integer *n, integer *ncols, real *a, integer *lda, real *
 	af, integer *ldaf)
 {
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2;
    real ret_val, r__1, r__2;

    /* Local variables */
    real amax, umax;
    integer i__, j;
    real rpvgrw;


 /*  -- LAPACK computational routine (version 3.7.0) -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 /*     December 2016 */


 /*  ===================================================================== */


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;

    /* Function Body */
    rpvgrw = 1.f;
    i__1 = *ncols;
    for (j = 1; j <= i__1; ++j) {
 	amax = 0.f;
 	umax = 0.f;
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    r__2 = (r__1 = a[i__ + j * a_dim1], abs(r__1));
 	    amax = f2cmax(r__2,amax);
 	}
 	i__2 = j;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
 	    r__2 = (r__1 = af[i__ + j * af_dim1], abs(r__1));
 	    umax = f2cmax(r__2,umax);
 	}
 	if (umax != 0.f) {
 /* Computing MIN */
 	    r__1 = amax / umax;
 	    rpvgrw = f2cmin(r__1,rpvgrw);
 	}
    }
    ret_val = rpvgrw;
    return ret_val;
 } /* sla_gerpvgrw__ */